Skip to content

Commit 398dc58

Browse files
committed
Add function for replacing nonprinting (control) characters with ^ and M+ notation.
1 parent fb5f401 commit 398dc58

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

domdf_python_tools/utils.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
3131
"""
3232
#
33-
# Copyright © 2018-2020 Dominic Davis-Foster <[email protected]>
33+
# Copyright © 2018-2022 Dominic Davis-Foster <[email protected]>
3434
#
3535
# Permission is hereby granted, free of charge, to any person obtaining a copy
3636
# of this software and associated documentation files (the "Software"), to deal
@@ -84,6 +84,7 @@
8484
List,
8585
Optional,
8686
Pattern,
87+
Set,
8788
Tuple,
8889
TypeVar,
8990
Union,
@@ -128,6 +129,7 @@
128129
"divide",
129130
"redivide",
130131
"unique_sorted",
132+
"replace_nonprinting",
131133
]
132134

133135
#: The current major python version.
@@ -346,6 +348,13 @@ def posargs2kwargs(
346348
elif callable(posarg_names):
347349
posarg_names = inspect.getfullargspec(posarg_names).args
348350

351+
for name, arg_value in zip(posarg_names, args):
352+
if name in kwargs:
353+
if isinstance(posarg_names, MethodType):
354+
raise TypeError(f"{posarg_names.__name__}(): got multiple values for argument '{name}'")
355+
else:
356+
raise TypeError(f"got multiple values for argument '{name}'")
357+
349358
kwargs.update(zip(posarg_names, args))
350359

351360
if self_arg is not None and self_arg in kwargs:
@@ -613,3 +622,38 @@ def unique_sorted(
613622
"""
614623

615624
return sorted(set(elements), key=key, reverse=reverse)
625+
626+
627+
def replace_nonprinting(string: str, exclude: Optional[Set[int]] = None) -> str:
628+
"""
629+
Replace nonprinting (control) characters in ``string`` with ``^`` and ``M-`` notation.
630+
631+
.. versionadded:: 3.3.0
632+
633+
:param string:
634+
:param exclude: A set of codepoints to exclude.
635+
636+
:rtype:
637+
638+
.. seealso:: :wikipedia:`C0 and C1 control codes` on Wikipedia
639+
"""
640+
641+
# https://stackoverflow.com/a/44952259
642+
643+
if exclude is None:
644+
exclude = set()
645+
646+
translation_map = {}
647+
648+
for codepoint in range(32):
649+
if codepoint not in exclude:
650+
translation_map[codepoint] = f"^{chr(64 + codepoint)}"
651+
652+
if 127 not in exclude:
653+
translation_map[127] = "^?"
654+
655+
for codepoint in range(128, 256):
656+
if codepoint not in exclude:
657+
translation_map[codepoint] = f"M+{chr(codepoint-64)}"
658+
659+
return string.translate(translation_map)

tests/test_utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
pyversion,
3737
redirect_output,
3838
redivide,
39+
replace_nonprinting,
3940
stderr_writer,
4041
str2tuple,
4142
strtobool,
@@ -527,3 +528,18 @@ def test_redivide_errors():
527528
)
528529
def test_unique_sorted(values, expected):
529530
assert unique_sorted(values) == expected
531+
532+
533+
@pytest.mark.parametrize(
534+
"the_string, expected",
535+
[
536+
("\t\t\t", "^I^I^I"),
537+
("\u0000\u0000\u0000", "^@^@^@"),
538+
("\r\n", "^M^J"),
539+
("\b\u000b", "^H^K"),
540+
("\u001a", "^Z^?"),
541+
('\x81', "M+A"),
542+
]
543+
)
544+
def test_replace_nonprinting(the_string: str, expected: str):
545+
assert replace_nonprinting(the_string) == expected

0 commit comments

Comments
 (0)