From 5f4c7c533a48b0a6854473b6ab939f4e493f7b8c Mon Sep 17 00:00:00 2001 From: Josh Trzebiatowski Date: Wed, 10 Jul 2024 11:29:29 -0500 Subject: [PATCH 01/15] wip: email format options --- docs/usage.rst | 17 +++++ src/check_jsonschema/cli/main_command.py | 17 +++++ src/check_jsonschema/cli/parse_result.py | 1 + src/check_jsonschema/formats/__init__.py | 33 ++++++++- .../formats/implementations/__init__.py | 4 +- .../formats/implementations/rfc5321.py | 43 ++++++++++++ .../formats/implementations/rfc6531.py | 67 +++++++++++++++++++ 7 files changed, 180 insertions(+), 2 deletions(-) create mode 100644 src/check_jsonschema/formats/implementations/rfc5321.py create mode 100644 src/check_jsonschema/formats/implementations/rfc6531.py diff --git a/docs/usage.rst b/docs/usage.rst index 1768c63c0..a92d1a696 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -198,6 +198,23 @@ follows: * - python - Require the regex to be valid in Python regex syntax. +``--format-email`` +~~~~~~~~~~~~~~~~~~ + +Set a mode for handling of the ``"email"`` and ``"idn-email"`` values for ``"format"``. The modes are as +follows: + +.. list-table:: Email Options + :widths: 15 30 + :header-rows: 1 + + * - mode + - description + * - default + - Require the email address to pass a basic sanity check + * - full + - Require the email to match RFC5321 for ``"email"`` or RFC6531 for ``"idn-email"``` + Other Options -------------- diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py index 3145019e8..abc37318c 100644 --- a/src/check_jsonschema/cli/main_command.py +++ b/src/check_jsonschema/cli/main_command.py @@ -74,6 +74,11 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: date, date-time, email, ipv4, ipv6, regex, uuid \b +For the "email" and "idn-email" formats, there are multiple modes which can be specified with +'--format-email': + default | only check that the string contains "@" + full | check the string against RFC 5321 (email) or RFC 6531 (idn-email) + For the "regex" format, there are multiple modes which can be specified with '--format-regex': default | check that the string is a valid ECMAScript regex @@ -155,6 +160,16 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: default=RegexVariantName.default.value, type=click.Choice([x.value for x in RegexVariantName], case_sensitive=False), ) +@click.option( + "--format-email", + help=( + "Set the mode of format validation for email addresses. " + "If `--disable-formats email` or `--disable-formats idn-email` is " + "used, this option has no effect on the disabled format." + ), + default=EmailVariantName.default.value, + type=click.Choice([x.value for x in EmailVariantName], case_sensitive=False), +) @click.option( "--default-filetype", help="A default filetype to assume when a file's type is not detected", @@ -240,6 +255,7 @@ def main( no_cache: bool, cache_filename: str | None, disable_formats: tuple[list[str], ...], + format_email: Literal["full", "default"], format_regex: Literal["python", "default"], default_filetype: Literal["json", "yaml", "toml", "json5"], traceback_mode: Literal["full", "short"], @@ -267,6 +283,7 @@ def main( else: args.disable_formats = normalized_disable_formats + args.format_email = RegexVariantName(format_email) args.format_regex = RegexVariantName(format_regex) args.disable_cache = no_cache args.default_filetype = default_filetype diff --git a/src/check_jsonschema/cli/parse_result.py b/src/check_jsonschema/cli/parse_result.py index a317378f9..3dc73fbce 100644 --- a/src/check_jsonschema/cli/parse_result.py +++ b/src/check_jsonschema/cli/parse_result.py @@ -83,6 +83,7 @@ def set_validator( def format_opts(self) -> FormatOptions: return FormatOptions( enabled=not self.disable_all_formats, + email_variant=self.format_email, regex_variant=self.format_regex, disabled_formats=self.disable_formats, ) diff --git a/src/check_jsonschema/formats/__init__.py b/src/check_jsonschema/formats/__init__.py index 8202d9a00..261e15786 100644 --- a/src/check_jsonschema/formats/__init__.py +++ b/src/check_jsonschema/formats/__init__.py @@ -9,7 +9,7 @@ import jsonschema.validators import regress -from .implementations import validate_rfc3339, validate_time +from .implementations import validate_rfc3339, validate_rfc5321, validate_rfc6531, validate_time # all known format strings except for a selection from draft3 which have either # been renamed or removed: @@ -39,6 +39,32 @@ ) +class EmailVariantName(enum.Enum): + default = "default" + full = "full" + + +class EmailImplementation: + def __init__(self, variant: EmailVariantName) -> None: + self.variant = variant + + def check_format_email(self, instance: t.Any) -> bool: + if not isinstance(instance, str): + return True + if self.variant == EmailVariantName.default: + return "@" in instance + else: + return validate_rfc5321(instance) + + def check_format_idn_email(self, instance: t.Any) -> bool: + if not isinstance(instance, str): + return True + if self.variant == EmailVariantName.default: + return "@" in instance + else: + return validate_rfc6531(instance) + + class RegexVariantName(enum.Enum): default = "default" python = "python" @@ -70,10 +96,12 @@ def __init__( self, *, enabled: bool = True, + email_variant: EmailVariantName = EmailVariantName.default, regex_variant: RegexVariantName = RegexVariantName.default, disabled_formats: tuple[str, ...] = (), ) -> None: self.enabled = enabled + self.email_variant = email_variant self.regex_variant = regex_variant self.disabled_formats = disabled_formats @@ -101,7 +129,10 @@ def make_format_checker( # replace the regex check del checker.checkers["regex"] + email_impl = EmailImplementation(opts.email_variant) regex_impl = RegexImplementation(opts.regex_variant) + checker.checks("email")(email_impl.check_format_email) + checker.checks("idn-email")(email_impl.check_format_idn_email) checker.checks("regex")(regex_impl.check_format) checker.checks("date-time")(validate_rfc3339) checker.checks("time")(validate_time) diff --git a/src/check_jsonschema/formats/implementations/__init__.py b/src/check_jsonschema/formats/implementations/__init__.py index 38ac89fe5..5cabca042 100644 --- a/src/check_jsonschema/formats/implementations/__init__.py +++ b/src/check_jsonschema/formats/implementations/__init__.py @@ -1,4 +1,6 @@ from .iso8601_time import validate as validate_time from .rfc3339 import validate as validate_rfc3339 +from .rfc5321 import validate as validate_rfc5321 +from .rfc6531 import validate as validate_rfc6531 -__all__ = ("validate_rfc3339", "validate_time") +__all__ = ("validate_rfc3339", "validate_rfc5321", "validate_rfc6531", "validate_time") diff --git a/src/check_jsonschema/formats/implementations/rfc5321.py b/src/check_jsonschema/formats/implementations/rfc5321.py new file mode 100644 index 000000000..d40e19add --- /dev/null +++ b/src/check_jsonschema/formats/implementations/rfc5321.py @@ -0,0 +1,43 @@ +import re + +RFC5321_REGEX = re.compile( + r""" + ^ + ( + [!#-'*+/-9=?A-Z^-~-]+(\.[!#-'*+/-9=?A-Z^-~-]+)* + | + "([]!#-[^-~ \t]|(\\[\t -~]))+" + ) + @ + ( + [!#-'*+/-9=?A-Z^-~-]+(\.[!#-'*+/-9=?A-Z^-~-]+)* + | + \[[\t -Z^-~]*] + ) + $ +""", + re.VERBOSE | re.ASCII, +) + + +def validate(email_str: object) -> bool: + """Validate a string as a RFC5321 email address.""" + if not isinstance(email_str, str): + return False + return not not RFC5321_REGEX.match(email_str) + + +if __name__ == "__main__": + import timeit + + N = 100_000 + tests = ( + ("basic", "user@example.com"), + ) + + print("benchmarking") + for name, val in tests: + all_times = timeit.repeat( + f"validate({val!r})", globals=globals(), repeat=3, number=N + ) + print(f"{name} (valid={validate(val)}): {int(min(all_times) / N * 10**9)}ns") diff --git a/src/check_jsonschema/formats/implementations/rfc6531.py b/src/check_jsonschema/formats/implementations/rfc6531.py new file mode 100644 index 000000000..2eb0caaf0 --- /dev/null +++ b/src/check_jsonschema/formats/implementations/rfc6531.py @@ -0,0 +1,67 @@ +import re + +RFC6531_REGEX = re.compile( + r""" + ^ + # local part + ( + ([0-9a-z!#$%&'*+-\/=?^_`\{|\}~\u{80}-\u{10FFFF}]+(\.[0-9a-z!#$%&'*+-\/=?^_`\{|\}~\u{80}-\u{10FFFF}]+)*) + | + # quoted string + ("( + [\x20-\x21\x23-\x5B\x5D-\x7E\u{80}-\u{10FFFF}] + | + \\[\x20-\x7E] + )*") + )(? bool: + """Validate a string as a RFC6531 email address.""" + if not isinstance(email_str, str): + return False + return not not RFC6531_REGEX.match(email_str) + + +if __name__ == "__main__": + import timeit + + N = 100_000 + tests = ( + ("basic", "user@example.com"), + ) + + print("benchmarking") + for name, val in tests: + all_times = timeit.repeat( + f"validate({val!r})", globals=globals(), repeat=3, number=N + ) + print(f"{name} (valid={validate(val)}): {int(min(all_times) / N * 10**9)}ns") From e660e169ef0372e8ec26f60a0d6640fb50655596 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Jul 2024 16:35:26 +0000 Subject: [PATCH 02/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/check_jsonschema/cli/main_command.py | 4 ++-- src/check_jsonschema/formats/__init__.py | 7 ++++++- .../formats/implementations/rfc5321.py | 6 ++---- .../formats/implementations/rfc6531.py | 20 +++++++------------ 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py index abc37318c..b8a4b7e22 100644 --- a/src/check_jsonschema/cli/main_command.py +++ b/src/check_jsonschema/cli/main_command.py @@ -10,7 +10,7 @@ from ..catalog import CUSTOM_SCHEMA_NAMES, SCHEMA_CATALOG from ..checker import SchemaChecker -from ..formats import KNOWN_FORMATS, RegexVariantName +from ..formats import KNOWN_FORMATS, RegexVariantName, EmailVariantName from ..instance_loader import InstanceLoader from ..parsers import SUPPORTED_FILE_FORMATS from ..reporter import REPORTER_BY_NAME, Reporter @@ -283,7 +283,7 @@ def main( else: args.disable_formats = normalized_disable_formats - args.format_email = RegexVariantName(format_email) + args.format_email = EmailVariantName(format_email) args.format_regex = RegexVariantName(format_regex) args.disable_cache = no_cache args.default_filetype = default_filetype diff --git a/src/check_jsonschema/formats/__init__.py b/src/check_jsonschema/formats/__init__.py index 261e15786..464267422 100644 --- a/src/check_jsonschema/formats/__init__.py +++ b/src/check_jsonschema/formats/__init__.py @@ -9,7 +9,12 @@ import jsonschema.validators import regress -from .implementations import validate_rfc3339, validate_rfc5321, validate_rfc6531, validate_time +from .implementations import ( + validate_rfc3339, + validate_rfc5321, + validate_rfc6531, + validate_time, +) # all known format strings except for a selection from draft3 which have either # been renamed or removed: diff --git a/src/check_jsonschema/formats/implementations/rfc5321.py b/src/check_jsonschema/formats/implementations/rfc5321.py index d40e19add..67a099dbd 100644 --- a/src/check_jsonschema/formats/implementations/rfc5321.py +++ b/src/check_jsonschema/formats/implementations/rfc5321.py @@ -15,7 +15,7 @@ \[[\t -Z^-~]*] ) $ -""", + """, re.VERBOSE | re.ASCII, ) @@ -31,9 +31,7 @@ def validate(email_str: object) -> bool: import timeit N = 100_000 - tests = ( - ("basic", "user@example.com"), - ) + tests = (("basic", "user@example.com"),) print("benchmarking") for name, val in tests: diff --git a/src/check_jsonschema/formats/implementations/rfc6531.py b/src/check_jsonschema/formats/implementations/rfc6531.py index 2eb0caaf0..edfed5033 100644 --- a/src/check_jsonschema/formats/implementations/rfc6531.py +++ b/src/check_jsonschema/formats/implementations/rfc6531.py @@ -5,15 +5,11 @@ ^ # local part ( - ([0-9a-z!#$%&'*+-\/=?^_`\{|\}~\u{80}-\u{10FFFF}]+(\.[0-9a-z!#$%&'*+-\/=?^_`\{|\}~\u{80}-\u{10FFFF}]+)*) + ([0-9a-z!#$%&'*+-\/=?^_`\{|\}~\u0080-\U0010FFFF]+(\.[0-9a-z!#$%&'*+-\/=?^_`\{|\}~\u0080-\U0010FFFF]+)*) | # quoted string - ("( - [\x20-\x21\x23-\x5B\x5D-\x7E\u{80}-\u{10FFFF}] - | - \\[\x20-\x7E] - )*") - )(? bool: import timeit N = 100_000 - tests = ( - ("basic", "user@example.com"), - ) + tests = (("basic", "user@example.com"),) print("benchmarking") for name, val in tests: From 43bef30b587d8fea26059c184a642231e2c1049f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Jul 2024 17:08:01 +0000 Subject: [PATCH 03/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/check_jsonschema/cli/main_command.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py index b8a4b7e22..9b3354a1f 100644 --- a/src/check_jsonschema/cli/main_command.py +++ b/src/check_jsonschema/cli/main_command.py @@ -10,7 +10,7 @@ from ..catalog import CUSTOM_SCHEMA_NAMES, SCHEMA_CATALOG from ..checker import SchemaChecker -from ..formats import KNOWN_FORMATS, RegexVariantName, EmailVariantName +from ..formats import KNOWN_FORMATS, EmailVariantName, RegexVariantName from ..instance_loader import InstanceLoader from ..parsers import SUPPORTED_FILE_FORMATS from ..reporter import REPORTER_BY_NAME, Reporter From 4bfadc1bded97b7ad5b89b53782c6f9b04c71707 Mon Sep 17 00:00:00 2001 From: Joshua Trzebiatowski Date: Wed, 10 Jul 2024 12:18:08 -0500 Subject: [PATCH 04/15] update ParseResult --- src/check_jsonschema/cli/parse_result.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/check_jsonschema/cli/parse_result.py b/src/check_jsonschema/cli/parse_result.py index 3dc73fbce..ce6a675c7 100644 --- a/src/check_jsonschema/cli/parse_result.py +++ b/src/check_jsonschema/cli/parse_result.py @@ -6,7 +6,7 @@ import click import jsonschema -from ..formats import FormatOptions, RegexVariantName +from ..formats import EmailVariantName, FormatOptions, RegexVariantName from ..transforms import Transform @@ -36,6 +36,7 @@ def __init__(self) -> None: # regex format options self.disable_all_formats: bool = False self.disable_formats: tuple[str, ...] = () + self.format_email: EmailVariantName = EmailVariantName.default self.format_regex: RegexVariantName = RegexVariantName.default # error and output controls self.verbosity: int = 1 From 7bd81f8b84001e81297c9d2d1a28b0a10fb6a100 Mon Sep 17 00:00:00 2001 From: Joshua Trzebiatowski Date: Wed, 10 Jul 2024 12:26:58 -0500 Subject: [PATCH 05/15] nested set fix --- src/check_jsonschema/formats/implementations/rfc6531.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/check_jsonschema/formats/implementations/rfc6531.py b/src/check_jsonschema/formats/implementations/rfc6531.py index edfed5033..714f3bdc6 100644 --- a/src/check_jsonschema/formats/implementations/rfc6531.py +++ b/src/check_jsonschema/formats/implementations/rfc6531.py @@ -28,7 +28,7 @@ (IPv6:([0-9a-f]{1,4}(:[0-9a-f]{1,4}){0,3})?::([0-9a-f]{1,4}(:[0-9a-f]{1,4}){0,3}:)?\d{1,3}(\.\d{1,3}){3}) | # General address - ([a-z0-9-]*[[a-z0-9]:[\x21-\x5A\x5E-\x7E]+) + ([a-z0-9-]*[a-z0-9]:[\x21-\x5A\x5E-\x7E]+) )\]) | # Domain From 20f7209ff3c4cf19e141181ecca1c8911c388fe8 Mon Sep 17 00:00:00 2001 From: Joshua Trzebiatowski Date: Wed, 10 Jul 2024 12:29:00 -0500 Subject: [PATCH 06/15] update changelog --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ec3299036..520e81524 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,7 @@ Unreleased ---------- .. vendor-insert-here +- Add ``--format-email`` option to allow full validation of email/idn-email formats 0.29.0 ------ From b3df29cd775fb0f20395e3f8e842a87a8a4a1814 Mon Sep 17 00:00:00 2001 From: Joshua Trzebiatowski Date: Wed, 10 Jul 2024 12:38:35 -0500 Subject: [PATCH 07/15] line length --- src/check_jsonschema/cli/main_command.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py index 9b3354a1f..721dd9683 100644 --- a/src/check_jsonschema/cli/main_command.py +++ b/src/check_jsonschema/cli/main_command.py @@ -74,7 +74,7 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: date, date-time, email, ipv4, ipv6, regex, uuid \b -For the "email" and "idn-email" formats, there are multiple modes which can be specified with +For the "email" and "idn-email" formats, there are multiple modes which can be specified '--format-email': default | only check that the string contains "@" full | check the string against RFC 5321 (email) or RFC 6531 (idn-email) From 4bcc1027ee51265604ca39b46a875da30eeb8884 Mon Sep 17 00:00:00 2001 From: Josh Trzebiatowski Date: Thu, 11 Jul 2024 12:03:20 -0500 Subject: [PATCH 08/15] Apply suggestions from code review Co-authored-by: Stephen Rosen --- src/check_jsonschema/formats/implementations/rfc5321.py | 2 +- src/check_jsonschema/formats/implementations/rfc6531.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/check_jsonschema/formats/implementations/rfc5321.py b/src/check_jsonschema/formats/implementations/rfc5321.py index 67a099dbd..ba8e01321 100644 --- a/src/check_jsonschema/formats/implementations/rfc5321.py +++ b/src/check_jsonschema/formats/implementations/rfc5321.py @@ -24,7 +24,7 @@ def validate(email_str: object) -> bool: """Validate a string as a RFC5321 email address.""" if not isinstance(email_str, str): return False - return not not RFC5321_REGEX.match(email_str) + return RFC5321_REGEX.match(email_str) if __name__ == "__main__": diff --git a/src/check_jsonschema/formats/implementations/rfc6531.py b/src/check_jsonschema/formats/implementations/rfc6531.py index 714f3bdc6..ecd5bc4e1 100644 --- a/src/check_jsonschema/formats/implementations/rfc6531.py +++ b/src/check_jsonschema/formats/implementations/rfc6531.py @@ -44,7 +44,7 @@ def validate(email_str: object) -> bool: """Validate a string as a RFC6531 email address.""" if not isinstance(email_str, str): return False - return not not RFC6531_REGEX.match(email_str) + return RFC6531_REGEX.match(email_str) if __name__ == "__main__": From 07d7204a5dcb21827c46a3378db62a1df607021e Mon Sep 17 00:00:00 2001 From: Joshua Trzebiatowski Date: Fri, 26 Jul 2024 17:21:44 -0500 Subject: [PATCH 09/15] removed --format-email CLI option. Refactored RFC5321 validation --- docs/usage.rst | 17 ---------- src/check_jsonschema/cli/main_command.py | 19 +---------- src/check_jsonschema/cli/parse_result.py | 4 +-- src/check_jsonschema/formats/__init__.py | 24 +++----------- .../formats/implementations/rfc5321.py | 33 ++++++++++++++++--- 5 files changed, 35 insertions(+), 62 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index a92d1a696..1768c63c0 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -198,23 +198,6 @@ follows: * - python - Require the regex to be valid in Python regex syntax. -``--format-email`` -~~~~~~~~~~~~~~~~~~ - -Set a mode for handling of the ``"email"`` and ``"idn-email"`` values for ``"format"``. The modes are as -follows: - -.. list-table:: Email Options - :widths: 15 30 - :header-rows: 1 - - * - mode - - description - * - default - - Require the email address to pass a basic sanity check - * - full - - Require the email to match RFC5321 for ``"email"`` or RFC6531 for ``"idn-email"``` - Other Options -------------- diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py index 721dd9683..3145019e8 100644 --- a/src/check_jsonschema/cli/main_command.py +++ b/src/check_jsonschema/cli/main_command.py @@ -10,7 +10,7 @@ from ..catalog import CUSTOM_SCHEMA_NAMES, SCHEMA_CATALOG from ..checker import SchemaChecker -from ..formats import KNOWN_FORMATS, EmailVariantName, RegexVariantName +from ..formats import KNOWN_FORMATS, RegexVariantName from ..instance_loader import InstanceLoader from ..parsers import SUPPORTED_FILE_FORMATS from ..reporter import REPORTER_BY_NAME, Reporter @@ -74,11 +74,6 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: date, date-time, email, ipv4, ipv6, regex, uuid \b -For the "email" and "idn-email" formats, there are multiple modes which can be specified -'--format-email': - default | only check that the string contains "@" - full | check the string against RFC 5321 (email) or RFC 6531 (idn-email) - For the "regex" format, there are multiple modes which can be specified with '--format-regex': default | check that the string is a valid ECMAScript regex @@ -160,16 +155,6 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: default=RegexVariantName.default.value, type=click.Choice([x.value for x in RegexVariantName], case_sensitive=False), ) -@click.option( - "--format-email", - help=( - "Set the mode of format validation for email addresses. " - "If `--disable-formats email` or `--disable-formats idn-email` is " - "used, this option has no effect on the disabled format." - ), - default=EmailVariantName.default.value, - type=click.Choice([x.value for x in EmailVariantName], case_sensitive=False), -) @click.option( "--default-filetype", help="A default filetype to assume when a file's type is not detected", @@ -255,7 +240,6 @@ def main( no_cache: bool, cache_filename: str | None, disable_formats: tuple[list[str], ...], - format_email: Literal["full", "default"], format_regex: Literal["python", "default"], default_filetype: Literal["json", "yaml", "toml", "json5"], traceback_mode: Literal["full", "short"], @@ -283,7 +267,6 @@ def main( else: args.disable_formats = normalized_disable_formats - args.format_email = EmailVariantName(format_email) args.format_regex = RegexVariantName(format_regex) args.disable_cache = no_cache args.default_filetype = default_filetype diff --git a/src/check_jsonschema/cli/parse_result.py b/src/check_jsonschema/cli/parse_result.py index ce6a675c7..a317378f9 100644 --- a/src/check_jsonschema/cli/parse_result.py +++ b/src/check_jsonschema/cli/parse_result.py @@ -6,7 +6,7 @@ import click import jsonschema -from ..formats import EmailVariantName, FormatOptions, RegexVariantName +from ..formats import FormatOptions, RegexVariantName from ..transforms import Transform @@ -36,7 +36,6 @@ def __init__(self) -> None: # regex format options self.disable_all_formats: bool = False self.disable_formats: tuple[str, ...] = () - self.format_email: EmailVariantName = EmailVariantName.default self.format_regex: RegexVariantName = RegexVariantName.default # error and output controls self.verbosity: int = 1 @@ -84,7 +83,6 @@ def set_validator( def format_opts(self) -> FormatOptions: return FormatOptions( enabled=not self.disable_all_formats, - email_variant=self.format_email, regex_variant=self.format_regex, disabled_formats=self.disable_formats, ) diff --git a/src/check_jsonschema/formats/__init__.py b/src/check_jsonschema/formats/__init__.py index 464267422..91a828f7a 100644 --- a/src/check_jsonschema/formats/__init__.py +++ b/src/check_jsonschema/formats/__init__.py @@ -43,31 +43,19 @@ "uuid", ) - -class EmailVariantName(enum.Enum): - default = "default" - full = "full" - - class EmailImplementation: - def __init__(self, variant: EmailVariantName) -> None: - self.variant = variant + def __init__(self) -> None: + pass def check_format_email(self, instance: t.Any) -> bool: if not isinstance(instance, str): return True - if self.variant == EmailVariantName.default: - return "@" in instance - else: - return validate_rfc5321(instance) + return validate_rfc5321(instance) def check_format_idn_email(self, instance: t.Any) -> bool: if not isinstance(instance, str): return True - if self.variant == EmailVariantName.default: - return "@" in instance - else: - return validate_rfc6531(instance) + return validate_rfc6531(instance) class RegexVariantName(enum.Enum): @@ -101,12 +89,10 @@ def __init__( self, *, enabled: bool = True, - email_variant: EmailVariantName = EmailVariantName.default, regex_variant: RegexVariantName = RegexVariantName.default, disabled_formats: tuple[str, ...] = (), ) -> None: self.enabled = enabled - self.email_variant = email_variant self.regex_variant = regex_variant self.disabled_formats = disabled_formats @@ -134,7 +120,7 @@ def make_format_checker( # replace the regex check del checker.checkers["regex"] - email_impl = EmailImplementation(opts.email_variant) + email_impl = EmailImplementation() regex_impl = RegexImplementation(opts.regex_variant) checker.checks("email")(email_impl.check_format_email) checker.checks("idn-email")(email_impl.check_format_idn_email) diff --git a/src/check_jsonschema/formats/implementations/rfc5321.py b/src/check_jsonschema/formats/implementations/rfc5321.py index ba8e01321..d09de2d2a 100644 --- a/src/check_jsonschema/formats/implementations/rfc5321.py +++ b/src/check_jsonschema/formats/implementations/rfc5321.py @@ -1,18 +1,26 @@ import re +# ([!#-'*+/-9=?A-Z^-~-]+(\.[!#-'*+/-9=?A-Z^-~-]+)*|"([]!#-[^-~ \t]|(\\[\t -~]))+") +# @ +# ([!#-'*+/-9=?A-Z^-~-]+(\.[!#-'*+/-9=?A-Z^-~-]+)*|\[[\t -Z^-~]*]) +# +# [a-zA-Z0-9!#$%&'*+/=?^_`{|}~-] == Alphanumeric characters and most special characters except [ (),.:;<>@\[\]\t] +# [a-zA-Z0-9 !#$%&'()*+,./:;<=>?@\[\]^_`{|}~\t-] == All printable characters except for " and \ +# [\t -~] == All printable characters +# [a-zA-Z0-9 !"#$%&'()*+,./:;<=>?@^_`{|}~\t-] == All printable characters except for the following characters []\ RFC5321_REGEX = re.compile( r""" ^ ( - [!#-'*+/-9=?A-Z^-~-]+(\.[!#-'*+/-9=?A-Z^-~-]+)* + [a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)* | - "([]!#-[^-~ \t]|(\\[\t -~]))+" + "([a-zA-Z0-9 !#$%&'()*+,./:;<=>?@\[\]^_`{|}~\t-]|\\[\t -~])+" ) @ ( - [!#-'*+/-9=?A-Z^-~-]+(\.[!#-'*+/-9=?A-Z^-~-]+)* + [a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)* | - \[[\t -Z^-~]*] + \[[a-zA-Z0-9 !"#$%&'()*+,./:;<=>?@^_`{|}~\t-]*\] ) $ """, @@ -24,7 +32,22 @@ def validate(email_str: object) -> bool: """Validate a string as a RFC5321 email address.""" if not isinstance(email_str, str): return False - return RFC5321_REGEX.match(email_str) + match = RFC5321_REGEX.match(email_str) + if not match: + return False + # Local part of email address is limited to 64 octets + local = str(match.groups()[0]) + if len(local) > 64: + return False + # Domain names are limited to 253 octets + domain = str(match.groups()[3]) + if len(domain) > 253: + return False + for domain_part in domain.split('.'): + # DNS Labels are limited to 63 octets + if len(domain_part) > 63: + return False + return True if __name__ == "__main__": From 8bc19e10683b8a9f4ae3d8f4fbcc2a67151d6a20 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 26 Jul 2024 22:22:10 +0000 Subject: [PATCH 10/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/check_jsonschema/formats/__init__.py | 1 + src/check_jsonschema/formats/implementations/rfc5321.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/check_jsonschema/formats/__init__.py b/src/check_jsonschema/formats/__init__.py index 91a828f7a..f9c6fa90a 100644 --- a/src/check_jsonschema/formats/__init__.py +++ b/src/check_jsonschema/formats/__init__.py @@ -43,6 +43,7 @@ "uuid", ) + class EmailImplementation: def __init__(self) -> None: pass diff --git a/src/check_jsonschema/formats/implementations/rfc5321.py b/src/check_jsonschema/formats/implementations/rfc5321.py index d09de2d2a..7c833d463 100644 --- a/src/check_jsonschema/formats/implementations/rfc5321.py +++ b/src/check_jsonschema/formats/implementations/rfc5321.py @@ -43,7 +43,7 @@ def validate(email_str: object) -> bool: domain = str(match.groups()[3]) if len(domain) > 253: return False - for domain_part in domain.split('.'): + for domain_part in domain.split("."): # DNS Labels are limited to 63 octets if len(domain_part) > 63: return False From 0befdf465cab87d734ca8da3119e9889a84aeb04 Mon Sep 17 00:00:00 2001 From: Joshua Trzebiatowski Date: Mon, 29 Jul 2024 17:24:12 -0500 Subject: [PATCH 11/15] add named match groups and acceptance tests --- .../formats/implementations/rfc5321.py | 13 +- tests/acceptance/test_format_email.py | 111 ++++++++++++++++++ 2 files changed, 117 insertions(+), 7 deletions(-) create mode 100644 tests/acceptance/test_format_email.py diff --git a/src/check_jsonschema/formats/implementations/rfc5321.py b/src/check_jsonschema/formats/implementations/rfc5321.py index 7c833d463..3278b21fe 100644 --- a/src/check_jsonschema/formats/implementations/rfc5321.py +++ b/src/check_jsonschema/formats/implementations/rfc5321.py @@ -11,14 +11,14 @@ RFC5321_REGEX = re.compile( r""" ^ - ( - [a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)* + (?P + [a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)* | - "([a-zA-Z0-9 !#$%&'()*+,./:;<=>?@\[\]^_`{|}~\t-]|\\[\t -~])+" + "(?:[a-zA-Z0-9 !#$%&'()*+,./:;<=>?@\[\]^_`{|}~\t-]|\\[\t -~])+" ) @ - ( - [a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)* + (?P + [a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)* | \[[a-zA-Z0-9 !"#$%&'()*+,./:;<=>?@^_`{|}~\t-]*\] ) @@ -35,12 +35,11 @@ def validate(email_str: object) -> bool: match = RFC5321_REGEX.match(email_str) if not match: return False + local, domain = match.group('local', 'domain') # Local part of email address is limited to 64 octets - local = str(match.groups()[0]) if len(local) > 64: return False # Domain names are limited to 253 octets - domain = str(match.groups()[3]) if len(domain) > 253: return False for domain_part in domain.split("."): diff --git a/tests/acceptance/test_format_email.py b/tests/acceptance/test_format_email.py new file mode 100644 index 000000000..03482867d --- /dev/null +++ b/tests/acceptance/test_format_email.py @@ -0,0 +1,111 @@ +# Test email strings for validity +import json + +import pytest + +FORMAT_SCHEMA_EMAIL = { + "$schema": "http://json-schema.org/draft-07/schema", + "properties": {"email": {"type": "string", "format": "email"}}, +} +FORMAT_SCHEMA_IDN_EMAIL = { + "$schema": "http://json-schema.org/draft-07/schema", + "properties": {"email": {"type": "string", "format": "idn-email"}}, +} + +ALWAYS_PASSING_EMAILS = [ + {'email': r'simple@example.com'}, + {'email': r'very.common@example.com'}, + {'email': r'FirstName.LastName@EasierReading.org'}, + {'email': r'x@example.com'}, + {'email': r'long.email-address-with-hyphens@and.subdomains.example.com'}, + {'email': r'user.name+tag+sorting@example.com'}, + {'email': r'name/surname@example.com'}, + {'email': r'admin@example'}, + {'email': r'example@s.example'}, + {'email': r'" "@example.org'}, + {'email': r'"john..doe"@example.org'}, + {'email': r'mailhost!username@example.org'}, + {'email': r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com'}, + {'email': r'user%example.com@example.org'}, + {'email': r'user-@example.org'}, + {'email': r'postmaster@[123.123.123.123]'}, + {'email': r'postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]'}, + {'email': r'_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]'}, +] + +IDN_ONLY_EMAILS = [ + {'email': r'I❤️CHOCOLATE@example.com'}, +] + +ALWAYS_FAILING_EMAILS = [ + {'email': r'abc.example.com'}, + {'email': r'a@b@c@example.com'}, + {'email': r'a"b(c)d,e:f;gi[j\k]l@example.com'}, + {'email': r'just"not"right@example.com'}, + {'email': r'this is"not\allowed@example.com'}, + {'email': r'this\ still\"not\\allowed@example.com'}, + {'email': r'1234567890123456789012345678901234567890123456789012345678901234+x@example.com'}, + {'email': r'i.like.underscores@but_they_are_not_allowed_in_this_part'}, + {'email': r'trythis@123456789012345678901234567890123456789012345678901234567890123456.com'}, + {'email': r'another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com'}, +] + +def test_email_format_good(run_line, tmp_path): + schemafile = tmp_path / "schema.json" + schemafile.write_text(json.dumps(FORMAT_SCHEMA_EMAIL)) + + for idx, email_doc in enumerate(ALWAYS_PASSING_EMAILS): + doc = tmp_path / f"doc{idx}.json" + doc.write_text(json.dumps(email_doc)) + res = run_line( [ + "check-jsonschema", + "--schemafile", + str(schemafile), + str(doc), + ],) + assert (email_doc["email"], res.exit_code) == (email_doc["email"], 0) + +def test_email_format_bad(run_line, tmp_path): + schemafile = tmp_path / "schema.json" + schemafile.write_text(json.dumps(FORMAT_SCHEMA_EMAIL)) + + for idx, email_doc in enumerate(ALWAYS_FAILING_EMAILS + IDN_ONLY_EMAILS): + doc = tmp_path / f"doc{idx}.json" + doc.write_text(json.dumps(email_doc)) + res = run_line( [ + "check-jsonschema", + "--schemafile", + str(schemafile), + str(doc), + ],) + assert (email_doc["email"], res.exit_code) != (email_doc["email"], 0) + +def test_idn_email_format_good(run_line, tmp_path): + schemafile = tmp_path / "schema.json" + schemafile.write_text(json.dumps(FORMAT_SCHEMA_IDN_EMAIL)) + + for idx, email_doc in enumerate(ALWAYS_PASSING_EMAILS + IDN_ONLY_EMAILS): + doc = tmp_path / f"doc{idx}.json" + doc.write_text(json.dumps(email_doc)) + res = run_line( [ + "check-jsonschema", + "--schemafile", + str(schemafile), + str(doc), + ],) + assert (email_doc["email"], res.exit_code) == (email_doc["email"], 0) + +def test_idn_email_format_bad(run_line, tmp_path): + schemafile = tmp_path / "schema.json" + schemafile.write_text(json.dumps(FORMAT_SCHEMA_IDN_EMAIL)) + + for idx, email_doc in enumerate(ALWAYS_FAILING_EMAILS): + doc = tmp_path / f"doc{idx}.json" + doc.write_text(json.dumps(email_doc)) + res = run_line( [ + "check-jsonschema", + "--schemafile", + str(schemafile), + str(doc), + ],) + assert (email_doc["email"], res.exit_code) != (email_doc["email"], 0) From 03686fd3ffea7e28ccaa1ad87cad25fab972f718 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 29 Jul 2024 22:24:51 +0000 Subject: [PATCH 12/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../formats/implementations/rfc5321.py | 2 +- tests/acceptance/test_format_email.py | 150 ++++++++++-------- 2 files changed, 86 insertions(+), 66 deletions(-) diff --git a/src/check_jsonschema/formats/implementations/rfc5321.py b/src/check_jsonschema/formats/implementations/rfc5321.py index 3278b21fe..42691c807 100644 --- a/src/check_jsonschema/formats/implementations/rfc5321.py +++ b/src/check_jsonschema/formats/implementations/rfc5321.py @@ -35,7 +35,7 @@ def validate(email_str: object) -> bool: match = RFC5321_REGEX.match(email_str) if not match: return False - local, domain = match.group('local', 'domain') + local, domain = match.group("local", "domain") # Local part of email address is limited to 64 octets if len(local) > 64: return False diff --git a/tests/acceptance/test_format_email.py b/tests/acceptance/test_format_email.py index 03482867d..fdaa3ec52 100644 --- a/tests/acceptance/test_format_email.py +++ b/tests/acceptance/test_format_email.py @@ -13,99 +13,119 @@ } ALWAYS_PASSING_EMAILS = [ - {'email': r'simple@example.com'}, - {'email': r'very.common@example.com'}, - {'email': r'FirstName.LastName@EasierReading.org'}, - {'email': r'x@example.com'}, - {'email': r'long.email-address-with-hyphens@and.subdomains.example.com'}, - {'email': r'user.name+tag+sorting@example.com'}, - {'email': r'name/surname@example.com'}, - {'email': r'admin@example'}, - {'email': r'example@s.example'}, - {'email': r'" "@example.org'}, - {'email': r'"john..doe"@example.org'}, - {'email': r'mailhost!username@example.org'}, - {'email': r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com'}, - {'email': r'user%example.com@example.org'}, - {'email': r'user-@example.org'}, - {'email': r'postmaster@[123.123.123.123]'}, - {'email': r'postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]'}, - {'email': r'_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]'}, + {"email": r"simple@example.com"}, + {"email": r"very.common@example.com"}, + {"email": r"FirstName.LastName@EasierReading.org"}, + {"email": r"x@example.com"}, + {"email": r"long.email-address-with-hyphens@and.subdomains.example.com"}, + {"email": r"user.name+tag+sorting@example.com"}, + {"email": r"name/surname@example.com"}, + {"email": r"admin@example"}, + {"email": r"example@s.example"}, + {"email": r'" "@example.org'}, + {"email": r'"john..doe"@example.org'}, + {"email": r"mailhost!username@example.org"}, + { + "email": r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com' + }, + {"email": r"user%example.com@example.org"}, + {"email": r"user-@example.org"}, + {"email": r"postmaster@[123.123.123.123]"}, + {"email": r"postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]"}, + {"email": r"_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]"}, ] IDN_ONLY_EMAILS = [ - {'email': r'I❤️CHOCOLATE@example.com'}, + {"email": r"I❤️CHOCOLATE@example.com"}, ] ALWAYS_FAILING_EMAILS = [ - {'email': r'abc.example.com'}, - {'email': r'a@b@c@example.com'}, - {'email': r'a"b(c)d,e:f;gi[j\k]l@example.com'}, - {'email': r'just"not"right@example.com'}, - {'email': r'this is"not\allowed@example.com'}, - {'email': r'this\ still\"not\\allowed@example.com'}, - {'email': r'1234567890123456789012345678901234567890123456789012345678901234+x@example.com'}, - {'email': r'i.like.underscores@but_they_are_not_allowed_in_this_part'}, - {'email': r'trythis@123456789012345678901234567890123456789012345678901234567890123456.com'}, - {'email': r'another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com'}, + {"email": r"abc.example.com"}, + {"email": r"a@b@c@example.com"}, + {"email": r'a"b(c)d,e:f;gi[j\k]l@example.com'}, + {"email": r'just"not"right@example.com'}, + {"email": r'this is"not\allowed@example.com'}, + {"email": r"this\ still\"not\\allowed@example.com"}, + { + "email": r"1234567890123456789012345678901234567890123456789012345678901234+x@example.com" + }, + {"email": r"i.like.underscores@but_they_are_not_allowed_in_this_part"}, + { + "email": r"trythis@123456789012345678901234567890123456789012345678901234567890123456.com" + }, + { + "email": r"another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com" + }, ] + def test_email_format_good(run_line, tmp_path): schemafile = tmp_path / "schema.json" schemafile.write_text(json.dumps(FORMAT_SCHEMA_EMAIL)) for idx, email_doc in enumerate(ALWAYS_PASSING_EMAILS): - doc = tmp_path / f"doc{idx}.json" - doc.write_text(json.dumps(email_doc)) - res = run_line( [ - "check-jsonschema", - "--schemafile", - str(schemafile), - str(doc), - ],) - assert (email_doc["email"], res.exit_code) == (email_doc["email"], 0) + doc = tmp_path / f"doc{idx}.json" + doc.write_text(json.dumps(email_doc)) + res = run_line( + [ + "check-jsonschema", + "--schemafile", + str(schemafile), + str(doc), + ], + ) + assert (email_doc["email"], res.exit_code) == (email_doc["email"], 0) + def test_email_format_bad(run_line, tmp_path): schemafile = tmp_path / "schema.json" schemafile.write_text(json.dumps(FORMAT_SCHEMA_EMAIL)) for idx, email_doc in enumerate(ALWAYS_FAILING_EMAILS + IDN_ONLY_EMAILS): - doc = tmp_path / f"doc{idx}.json" - doc.write_text(json.dumps(email_doc)) - res = run_line( [ - "check-jsonschema", - "--schemafile", - str(schemafile), - str(doc), - ],) - assert (email_doc["email"], res.exit_code) != (email_doc["email"], 0) + doc = tmp_path / f"doc{idx}.json" + doc.write_text(json.dumps(email_doc)) + res = run_line( + [ + "check-jsonschema", + "--schemafile", + str(schemafile), + str(doc), + ], + ) + assert (email_doc["email"], res.exit_code) != (email_doc["email"], 0) + def test_idn_email_format_good(run_line, tmp_path): schemafile = tmp_path / "schema.json" schemafile.write_text(json.dumps(FORMAT_SCHEMA_IDN_EMAIL)) for idx, email_doc in enumerate(ALWAYS_PASSING_EMAILS + IDN_ONLY_EMAILS): - doc = tmp_path / f"doc{idx}.json" - doc.write_text(json.dumps(email_doc)) - res = run_line( [ - "check-jsonschema", - "--schemafile", - str(schemafile), - str(doc), - ],) - assert (email_doc["email"], res.exit_code) == (email_doc["email"], 0) + doc = tmp_path / f"doc{idx}.json" + doc.write_text(json.dumps(email_doc)) + res = run_line( + [ + "check-jsonschema", + "--schemafile", + str(schemafile), + str(doc), + ], + ) + assert (email_doc["email"], res.exit_code) == (email_doc["email"], 0) + def test_idn_email_format_bad(run_line, tmp_path): schemafile = tmp_path / "schema.json" schemafile.write_text(json.dumps(FORMAT_SCHEMA_IDN_EMAIL)) for idx, email_doc in enumerate(ALWAYS_FAILING_EMAILS): - doc = tmp_path / f"doc{idx}.json" - doc.write_text(json.dumps(email_doc)) - res = run_line( [ - "check-jsonschema", - "--schemafile", - str(schemafile), - str(doc), - ],) - assert (email_doc["email"], res.exit_code) != (email_doc["email"], 0) + doc = tmp_path / f"doc{idx}.json" + doc.write_text(json.dumps(email_doc)) + res = run_line( + [ + "check-jsonschema", + "--schemafile", + str(schemafile), + str(doc), + ], + ) + assert (email_doc["email"], res.exit_code) != (email_doc["email"], 0) From aef364712a6bd87eb6354643bda684c398af3884 Mon Sep 17 00:00:00 2001 From: Josh Trzebiatowski Date: Tue, 30 Jul 2024 10:26:30 -0500 Subject: [PATCH 13/15] Update test_format_email.py Add more idn email tests --- tests/acceptance/test_format_email.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/acceptance/test_format_email.py b/tests/acceptance/test_format_email.py index fdaa3ec52..9a58d70dc 100644 --- a/tests/acceptance/test_format_email.py +++ b/tests/acceptance/test_format_email.py @@ -37,6 +37,13 @@ IDN_ONLY_EMAILS = [ {"email": r"I❤️CHOCOLATE@example.com"}, + {"email": r"用户@例子.广告"}, + {"email": r"ಬೆಂಬಲ@ಡೇಟಾಮೇಲ್.ಭಾರತ"}, + {"email": r"अजय@डाटा.भारत"}, + {"email": r"квіточка@пошта.укр"}, + {"email": r"χρήστης@παράδειγμα.ελ"}, + {"email": r"Dörte@Sörensen.example.com"}, + {"email": r"коля@пример.рф"}, ] ALWAYS_FAILING_EMAILS = [ From 9d9a26621763b65f40e550054ff00b9859058893 Mon Sep 17 00:00:00 2001 From: Joshua Trzebiatowski Date: Tue, 30 Jul 2024 11:40:13 -0500 Subject: [PATCH 14/15] convert email acceptance tests to unit tests --- tests/acceptance/test_format_email.py | 138 -------------------------- tests/unit/formats/test_rfc5321.py | 57 +++++++++++ tests/unit/formats/test_rfc6531.py | 57 +++++++++++ 3 files changed, 114 insertions(+), 138 deletions(-) delete mode 100644 tests/acceptance/test_format_email.py create mode 100644 tests/unit/formats/test_rfc5321.py create mode 100644 tests/unit/formats/test_rfc6531.py diff --git a/tests/acceptance/test_format_email.py b/tests/acceptance/test_format_email.py deleted file mode 100644 index 9a58d70dc..000000000 --- a/tests/acceptance/test_format_email.py +++ /dev/null @@ -1,138 +0,0 @@ -# Test email strings for validity -import json - -import pytest - -FORMAT_SCHEMA_EMAIL = { - "$schema": "http://json-schema.org/draft-07/schema", - "properties": {"email": {"type": "string", "format": "email"}}, -} -FORMAT_SCHEMA_IDN_EMAIL = { - "$schema": "http://json-schema.org/draft-07/schema", - "properties": {"email": {"type": "string", "format": "idn-email"}}, -} - -ALWAYS_PASSING_EMAILS = [ - {"email": r"simple@example.com"}, - {"email": r"very.common@example.com"}, - {"email": r"FirstName.LastName@EasierReading.org"}, - {"email": r"x@example.com"}, - {"email": r"long.email-address-with-hyphens@and.subdomains.example.com"}, - {"email": r"user.name+tag+sorting@example.com"}, - {"email": r"name/surname@example.com"}, - {"email": r"admin@example"}, - {"email": r"example@s.example"}, - {"email": r'" "@example.org'}, - {"email": r'"john..doe"@example.org'}, - {"email": r"mailhost!username@example.org"}, - { - "email": r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com' - }, - {"email": r"user%example.com@example.org"}, - {"email": r"user-@example.org"}, - {"email": r"postmaster@[123.123.123.123]"}, - {"email": r"postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]"}, - {"email": r"_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]"}, -] - -IDN_ONLY_EMAILS = [ - {"email": r"I❤️CHOCOLATE@example.com"}, - {"email": r"用户@例子.广告"}, - {"email": r"ಬೆಂಬಲ@ಡೇಟಾಮೇಲ್.ಭಾರತ"}, - {"email": r"अजय@डाटा.भारत"}, - {"email": r"квіточка@пошта.укр"}, - {"email": r"χρήστης@παράδειγμα.ελ"}, - {"email": r"Dörte@Sörensen.example.com"}, - {"email": r"коля@пример.рф"}, -] - -ALWAYS_FAILING_EMAILS = [ - {"email": r"abc.example.com"}, - {"email": r"a@b@c@example.com"}, - {"email": r'a"b(c)d,e:f;gi[j\k]l@example.com'}, - {"email": r'just"not"right@example.com'}, - {"email": r'this is"not\allowed@example.com'}, - {"email": r"this\ still\"not\\allowed@example.com"}, - { - "email": r"1234567890123456789012345678901234567890123456789012345678901234+x@example.com" - }, - {"email": r"i.like.underscores@but_they_are_not_allowed_in_this_part"}, - { - "email": r"trythis@123456789012345678901234567890123456789012345678901234567890123456.com" - }, - { - "email": r"another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com" - }, -] - - -def test_email_format_good(run_line, tmp_path): - schemafile = tmp_path / "schema.json" - schemafile.write_text(json.dumps(FORMAT_SCHEMA_EMAIL)) - - for idx, email_doc in enumerate(ALWAYS_PASSING_EMAILS): - doc = tmp_path / f"doc{idx}.json" - doc.write_text(json.dumps(email_doc)) - res = run_line( - [ - "check-jsonschema", - "--schemafile", - str(schemafile), - str(doc), - ], - ) - assert (email_doc["email"], res.exit_code) == (email_doc["email"], 0) - - -def test_email_format_bad(run_line, tmp_path): - schemafile = tmp_path / "schema.json" - schemafile.write_text(json.dumps(FORMAT_SCHEMA_EMAIL)) - - for idx, email_doc in enumerate(ALWAYS_FAILING_EMAILS + IDN_ONLY_EMAILS): - doc = tmp_path / f"doc{idx}.json" - doc.write_text(json.dumps(email_doc)) - res = run_line( - [ - "check-jsonschema", - "--schemafile", - str(schemafile), - str(doc), - ], - ) - assert (email_doc["email"], res.exit_code) != (email_doc["email"], 0) - - -def test_idn_email_format_good(run_line, tmp_path): - schemafile = tmp_path / "schema.json" - schemafile.write_text(json.dumps(FORMAT_SCHEMA_IDN_EMAIL)) - - for idx, email_doc in enumerate(ALWAYS_PASSING_EMAILS + IDN_ONLY_EMAILS): - doc = tmp_path / f"doc{idx}.json" - doc.write_text(json.dumps(email_doc)) - res = run_line( - [ - "check-jsonschema", - "--schemafile", - str(schemafile), - str(doc), - ], - ) - assert (email_doc["email"], res.exit_code) == (email_doc["email"], 0) - - -def test_idn_email_format_bad(run_line, tmp_path): - schemafile = tmp_path / "schema.json" - schemafile.write_text(json.dumps(FORMAT_SCHEMA_IDN_EMAIL)) - - for idx, email_doc in enumerate(ALWAYS_FAILING_EMAILS): - doc = tmp_path / f"doc{idx}.json" - doc.write_text(json.dumps(email_doc)) - res = run_line( - [ - "check-jsonschema", - "--schemafile", - str(schemafile), - str(doc), - ], - ) - assert (email_doc["email"], res.exit_code) != (email_doc["email"], 0) diff --git a/tests/unit/formats/test_rfc5321.py b/tests/unit/formats/test_rfc5321.py new file mode 100644 index 000000000..33722d7cf --- /dev/null +++ b/tests/unit/formats/test_rfc5321.py @@ -0,0 +1,57 @@ +import pytest + +from check_jsonschema.formats.implementations.rfc5321 import validate + + +@pytest.mark.parametrize( + "emailstr", + ( + r"simple@example.com", + r"very.common@example.com", + r"FirstName.LastName@EasierReading.org", + r"x@example.com", + r"long.email-address-with-hyphens@and.subdomains.example.com", + r"user.name+tag+sorting@example.com", + r"name/surname@example.com", + r"admin@example", + r"example@s.example", + r'" "@example.org', + r'"john..doe"@example.org', + r"mailhost!username@example.org", + r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com', + r"user%example.com@example.org", + r"user-@example.org", + r"postmaster@[123.123.123.123]", + r"postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", + r"_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", + ), +) +def test_simple_positive_cases(emailstr): + assert validate(emailstr) + + +@pytest.mark.parametrize( + "emailstr", + ( + r"I❤️CHOCOLATE@example.com", + r"用户@例子.广告", + r"ಬೆಂಬಲ@ಡೇಟಾಮೇಲ್.ಭಾರತ", + r"अजय@डाटा.भारत", + r"квіточка@пошта.укр", + r"χρήστης@παράδειγμα.ελ", + r"Dörte@Sörensen.example.com", + r"коля@пример.рф", + r"abc.example.com", + r"a@b@c@example.com", + r'a"b(c)d,e:f;gi[j\k]l@example.com', + r'just"not"right@example.com', + r'this is"not\allowed@example.com', + r"this\ still\"not\\allowed@example.com", + r"1234567890123456789012345678901234567890123456789012345678901234+x@example.com", + r"i.like.underscores@but_they_are_not_allowed_in_this_part", + r"trythis@123456789012345678901234567890123456789012345678901234567890123456.com", + r"another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com", + ), +) +def test_simple_negative_case(emailstr): + assert not validate(emailstr) diff --git a/tests/unit/formats/test_rfc6531.py b/tests/unit/formats/test_rfc6531.py new file mode 100644 index 000000000..2521d9f8c --- /dev/null +++ b/tests/unit/formats/test_rfc6531.py @@ -0,0 +1,57 @@ +import pytest + +from check_jsonschema.formats.implementations.rfc6531 import validate + + +@pytest.mark.parametrize( + "emailstr", + ( + r"simple@example.com", + r"very.common@example.com", + r"FirstName.LastName@EasierReading.org", + r"x@example.com", + r"long.email-address-with-hyphens@and.subdomains.example.com", + r"user.name+tag+sorting@example.com", + r"name/surname@example.com", + r"admin@example", + r"example@s.example", + r'" "@example.org', + r'"john..doe"@example.org', + r"mailhost!username@example.org", + r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com' + r"user%example.com@example.org", + r"user-@example.org", + r"postmaster@[123.123.123.123]", + r"postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", + r"_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", + r"I❤️CHOCOLATE@example.com", + r"用户@例子.广告", + r"ಬೆಂಬಲ@ಡೇಟಾಮೇಲ್.ಭಾರತ", + r"अजय@डाटा.भारत", + r"квіточка@пошта.укр", + r"χρήστης@παράδειγμα.ελ", + r"Dörte@Sörensen.example.com", + r"коля@пример.рф", + ), +) +def test_simple_positive_cases(emailstr): + assert validate(emailstr) + + +@pytest.mark.parametrize( + "emailstr", + ( + r"abc.example.com", + r"a@b@c@example.com", + r'a"b(c)d,e:f;gi[j\k]l@example.com', + r'just"not"right@example.com', + r'this is"not\allowed@example.com', + r"this\ still\"not\\allowed@example.com", + r"1234567890123456789012345678901234567890123456789012345678901234+x@example.com", + r"i.like.underscores@but_they_are_not_allowed_in_this_part", + r"trythis@123456789012345678901234567890123456789012345678901234567890123456.com", + r"another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com", + ), +) +def test_simple_negative_case(emailstr): + assert not validate(emailstr) From 31e291579c5bbf1f51729596b0d850a1f910284f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 30 Jul 2024 16:40:40 +0000 Subject: [PATCH 15/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unit/formats/test_rfc5321.py | 72 ++++++++++++++--------------- tests/unit/formats/test_rfc6531.py | 74 +++++++++++++++--------------- 2 files changed, 74 insertions(+), 72 deletions(-) diff --git a/tests/unit/formats/test_rfc5321.py b/tests/unit/formats/test_rfc5321.py index 33722d7cf..a9e838330 100644 --- a/tests/unit/formats/test_rfc5321.py +++ b/tests/unit/formats/test_rfc5321.py @@ -6,24 +6,24 @@ @pytest.mark.parametrize( "emailstr", ( - r"simple@example.com", - r"very.common@example.com", - r"FirstName.LastName@EasierReading.org", - r"x@example.com", - r"long.email-address-with-hyphens@and.subdomains.example.com", - r"user.name+tag+sorting@example.com", - r"name/surname@example.com", - r"admin@example", - r"example@s.example", - r'" "@example.org', - r'"john..doe"@example.org', - r"mailhost!username@example.org", - r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com', - r"user%example.com@example.org", - r"user-@example.org", - r"postmaster@[123.123.123.123]", - r"postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", - r"_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", + r"simple@example.com", + r"very.common@example.com", + r"FirstName.LastName@EasierReading.org", + r"x@example.com", + r"long.email-address-with-hyphens@and.subdomains.example.com", + r"user.name+tag+sorting@example.com", + r"name/surname@example.com", + r"admin@example", + r"example@s.example", + r'" "@example.org', + r'"john..doe"@example.org', + r"mailhost!username@example.org", + r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com', + r"user%example.com@example.org", + r"user-@example.org", + r"postmaster@[123.123.123.123]", + r"postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", + r"_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", ), ) def test_simple_positive_cases(emailstr): @@ -33,24 +33,24 @@ def test_simple_positive_cases(emailstr): @pytest.mark.parametrize( "emailstr", ( - r"I❤️CHOCOLATE@example.com", - r"用户@例子.广告", - r"ಬೆಂಬಲ@ಡೇಟಾಮೇಲ್.ಭಾರತ", - r"अजय@डाटा.भारत", - r"квіточка@пошта.укр", - r"χρήστης@παράδειγμα.ελ", - r"Dörte@Sörensen.example.com", - r"коля@пример.рф", - r"abc.example.com", - r"a@b@c@example.com", - r'a"b(c)d,e:f;gi[j\k]l@example.com', - r'just"not"right@example.com', - r'this is"not\allowed@example.com', - r"this\ still\"not\\allowed@example.com", - r"1234567890123456789012345678901234567890123456789012345678901234+x@example.com", - r"i.like.underscores@but_they_are_not_allowed_in_this_part", - r"trythis@123456789012345678901234567890123456789012345678901234567890123456.com", - r"another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com", + r"I❤️CHOCOLATE@example.com", + r"用户@例子.广告", + r"ಬೆಂಬಲ@ಡೇಟಾಮೇಲ್.ಭಾರತ", + r"अजय@डाटा.भारत", + r"квіточка@пошта.укр", + r"χρήστης@παράδειγμα.ελ", + r"Dörte@Sörensen.example.com", + r"коля@пример.рф", + r"abc.example.com", + r"a@b@c@example.com", + r'a"b(c)d,e:f;gi[j\k]l@example.com', + r'just"not"right@example.com', + r'this is"not\allowed@example.com', + r"this\ still\"not\\allowed@example.com", + r"1234567890123456789012345678901234567890123456789012345678901234+x@example.com", + r"i.like.underscores@but_they_are_not_allowed_in_this_part", + r"trythis@123456789012345678901234567890123456789012345678901234567890123456.com", + r"another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com", ), ) def test_simple_negative_case(emailstr): diff --git a/tests/unit/formats/test_rfc6531.py b/tests/unit/formats/test_rfc6531.py index 2521d9f8c..68d3e80ab 100644 --- a/tests/unit/formats/test_rfc6531.py +++ b/tests/unit/formats/test_rfc6531.py @@ -6,32 +6,34 @@ @pytest.mark.parametrize( "emailstr", ( - r"simple@example.com", - r"very.common@example.com", - r"FirstName.LastName@EasierReading.org", - r"x@example.com", - r"long.email-address-with-hyphens@and.subdomains.example.com", - r"user.name+tag+sorting@example.com", - r"name/surname@example.com", - r"admin@example", - r"example@s.example", - r'" "@example.org', - r'"john..doe"@example.org', - r"mailhost!username@example.org", - r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com' - r"user%example.com@example.org", - r"user-@example.org", - r"postmaster@[123.123.123.123]", - r"postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", - r"_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", - r"I❤️CHOCOLATE@example.com", - r"用户@例子.广告", - r"ಬೆಂಬಲ@ಡೇಟಾಮೇಲ್.ಭಾರತ", - r"अजय@डाटा.भारत", - r"квіточка@пошта.укр", - r"χρήστης@παράδειγμα.ελ", - r"Dörte@Sörensen.example.com", - r"коля@пример.рф", + r"simple@example.com", + r"very.common@example.com", + r"FirstName.LastName@EasierReading.org", + r"x@example.com", + r"long.email-address-with-hyphens@and.subdomains.example.com", + r"user.name+tag+sorting@example.com", + r"name/surname@example.com", + r"admin@example", + r"example@s.example", + r'" "@example.org', + r'"john..doe"@example.org', + r"mailhost!username@example.org", + ( + r'"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com' + r"user%example.com@example.org" + ), + r"user-@example.org", + r"postmaster@[123.123.123.123]", + r"postmaster@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", + r"_test@[IPv6:2001:0db8:85a3:0000:0000:8a2e:0370:7334]", + r"I❤️CHOCOLATE@example.com", + r"用户@例子.广告", + r"ಬೆಂಬಲ@ಡೇಟಾಮೇಲ್.ಭಾರತ", + r"अजय@डाटा.भारत", + r"квіточка@пошта.укр", + r"χρήστης@παράδειγμα.ελ", + r"Dörte@Sörensen.example.com", + r"коля@пример.рф", ), ) def test_simple_positive_cases(emailstr): @@ -41,16 +43,16 @@ def test_simple_positive_cases(emailstr): @pytest.mark.parametrize( "emailstr", ( - r"abc.example.com", - r"a@b@c@example.com", - r'a"b(c)d,e:f;gi[j\k]l@example.com', - r'just"not"right@example.com', - r'this is"not\allowed@example.com', - r"this\ still\"not\\allowed@example.com", - r"1234567890123456789012345678901234567890123456789012345678901234+x@example.com", - r"i.like.underscores@but_they_are_not_allowed_in_this_part", - r"trythis@123456789012345678901234567890123456789012345678901234567890123456.com", - r"another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com", + r"abc.example.com", + r"a@b@c@example.com", + r'a"b(c)d,e:f;gi[j\k]l@example.com', + r'just"not"right@example.com', + r'this is"not\allowed@example.com', + r"this\ still\"not\\allowed@example.com", + r"1234567890123456789012345678901234567890123456789012345678901234+x@example.com", + r"i.like.underscores@but_they_are_not_allowed_in_this_part", + r"trythis@123456789012345678901234567890123456789012345678901234567890123456.com", + r"another@12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234.com", ), ) def test_simple_negative_case(emailstr):