|
121 | 121 | from abc import ABC, abstractmethod |
122 | 122 | import functools |
123 | 123 | from hashlib import sha256 |
124 | | -from random import randint, random |
| 124 | +from random import random |
125 | 125 | from typing import Any, Callable, Collection, Dict, List, Literal, Optional, Tuple, Type, TypedDict, Union |
126 | 126 | from enum import IntEnum, auto |
| 127 | +from os import urandom |
127 | 128 |
|
128 | 129 | if sys.version_info[1] < 9: |
129 | 130 | from typing import Iterable |
|
144 | 145 | DEFAULT_TAB_WIDTH = 4 |
145 | 146 |
|
146 | 147 |
|
147 | | -SECRET_SALT = bytes(randint(0, 1000000)) |
| 148 | +SECRET_SALT = urandom(16) |
148 | 149 | # MD5 function was previously used for this; the "md5" prefix was kept for |
149 | 150 | # backwards compatibility. |
150 | 151 | def _hash_text(s: str) -> str: |
@@ -1262,8 +1263,13 @@ def _run_span_gamut(self, text: str) -> str: |
1262 | 1263 | (?: |
1263 | 1264 | # tag |
1264 | 1265 | </? |
1265 | | - (?:\w+) # tag name |
1266 | | - (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))* # attributes |
| 1266 | + (?:\w+) # tag name |
| 1267 | + (?: # attributes |
| 1268 | + \s+ # whitespace after tag |
| 1269 | + (?:[^\t<>"'=/]+:)? |
| 1270 | + [^<>"'=/]+= # attr name |
| 1271 | + (?:".*?"|'.*?'|[^<>"'=/\s]+) # value, quoted or unquoted. If unquoted, no spaces allowed |
| 1272 | + )* |
1267 | 1273 | \s*/?> |
1268 | 1274 | | |
1269 | 1275 | # auto-link (e.g., <http://www.activestate.com/>) |
@@ -1356,9 +1362,23 @@ def _is_comment(token): |
1356 | 1362 | is_html_markup = not is_html_markup |
1357 | 1363 | return ''.join(tokens) |
1358 | 1364 |
|
1359 | | - def _unhash_html_spans(self, text: str) -> str: |
1360 | | - for key, sanitized in list(self.html_spans.items()): |
1361 | | - text = text.replace(key, sanitized) |
| 1365 | + def _unhash_html_spans(self, text: str, spans=True, code=False) -> str: |
| 1366 | + ''' |
| 1367 | + Recursively unhash a block of text |
| 1368 | +
|
| 1369 | + Args: |
| 1370 | + spans: unhash anything from `self.html_spans` |
| 1371 | + code: unhash code blocks |
| 1372 | + ''' |
| 1373 | + orig = '' |
| 1374 | + while text != orig: |
| 1375 | + if spans: |
| 1376 | + for key, sanitized in list(self.html_spans.items()): |
| 1377 | + text = text.replace(key, sanitized) |
| 1378 | + if code: |
| 1379 | + for code, key in list(self._code_table.items()): |
| 1380 | + text = text.replace(key, code) |
| 1381 | + orig = text |
1362 | 1382 | return text |
1363 | 1383 |
|
1364 | 1384 | def _sanitize_html(self, s: str) -> str: |
@@ -1584,8 +1604,9 @@ def _do_links(self, text: str) -> str: |
1584 | 1604 |
|
1585 | 1605 | # We've got to encode these to avoid conflicting |
1586 | 1606 | # with italics/bold. |
1587 | | - url = url.replace('*', self._escape_table['*']) \ |
1588 | | - .replace('_', self._escape_table['_']) |
| 1607 | + url = self._unhash_html_spans(url, code=True) \ |
| 1608 | + .replace('*', self._escape_table['*']) \ |
| 1609 | + .replace('_', self._escape_table['_']) |
1589 | 1610 | if title: |
1590 | 1611 | title_str = ' title="%s"' % ( |
1591 | 1612 | _xml_escape_attr(title) |
|
0 commit comments