diff --git a/CHANGES.md b/CHANGES.md index 4283b966..f07fb095 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,6 +5,7 @@ - [pull #590] Fix underscores within bold text getting emphasized (#589) - [pull #591] Add Alerts extra - [pull #595] Fix img alt text being processed as markdown (#594) +- [pull #604] Fix XSS injection in image URLs (#603) ## python-markdown2 2.5.0 diff --git a/lib/markdown2.py b/lib/markdown2.py index d0f10eea..26e5b075 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1354,9 +1354,23 @@ def _is_comment(token): is_html_markup = not is_html_markup return ''.join(tokens) - def _unhash_html_spans(self, text: str) -> str: - for key, sanitized in list(self.html_spans.items()): - text = text.replace(key, sanitized) + def _unhash_html_spans(self, text: str, spans=True, code=False) -> str: + ''' + Recursively unhash a block of text + + Args: + spans: unhash anything from `self.html_spans` + code: unhash code blocks + ''' + orig = '' + while text != orig: + if spans: + for key, sanitized in list(self.html_spans.items()): + text = text.replace(key, sanitized) + if code: + for code, key in list(self._code_table.items()): + text = text.replace(key, code) + orig = text return text def _sanitize_html(self, s: str) -> str: @@ -1582,8 +1596,9 @@ def _do_links(self, text: str) -> str: # We've got to encode these to avoid conflicting # with italics/bold. - url = url.replace('*', self._escape_table['*']) \ - .replace('_', self._escape_table['_']) + url = self._unhash_html_spans(url, code=True) \ + .replace('*', self._escape_table['*']) \ + .replace('_', self._escape_table['_']) if title: title_str = ' title="%s"' % ( _xml_escape_attr(title) diff --git a/test/tm-cases/issue603_xss.html b/test/tm-cases/issue603_xss.html new file mode 100644 index 00000000..2293d586 --- /dev/null +++ b/test/tm-cases/issue603_xss.html @@ -0,0 +1,6 @@ +
" onerror=alert()//