Merge branch 'master' into gfm-link-refs

nicholasserra · web-flow · commit d49f7986e39b · 2024-09-23T15:04:18.000-04:00
diff --git a/CHANGES.md b/CHANGES.md
@@ -6,6 +6,9 @@
 - [pull #591] Add Alerts extra
 - [pull #595] Fix img alt text being processed as markdown (#594)
 - [pull #598] Add `link-shortrefs` extra (#597)
+- [pull #600] Use urandom for SECRET_SALT
+- [pull #602] Fix XSS issue in safe mode (#601)
+- [pull #604] Fix XSS injection in image URLs (#603)
 
 
 ## python-markdown2 2.5.0
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
@@ -60,3 +60,4 @@ Kishore (github.com/jk6521)
 Ircama (github.com/Ircama)
 Ankit Mahato (github.com/animator)
 Eric Dufresne (github.com/edufresne)
+Lyra Rebane (github.com/rebane2001)
diff --git a/lib/markdown2.py b/lib/markdown2.py
@@ -121,9 +121,10 @@
 from abc import ABC, abstractmethod
 import functools
 from hashlib import sha256
-from random import randint, random
+from random import random
 from typing import Any, Callable, Collection, Dict, List, Literal, Optional, Tuple, Type, TypedDict, Union
 from enum import IntEnum, auto
+from os import urandom
 
 if sys.version_info[1] < 9:
     from typing import Iterable
@@ -144,7 +145,7 @@
 DEFAULT_TAB_WIDTH = 4
 
 
-SECRET_SALT = bytes(randint(0, 1000000))
+SECRET_SALT = urandom(16)
 # MD5 function was previously used for this; the "md5" prefix was kept for
 # backwards compatibility.
 def _hash_text(s: str) -> str:
@@ -1262,8 +1263,13 @@ def _run_span_gamut(self, text: str) -> str:
             (?:
                 # tag
                 </?
-                (?:\w+)                                     # tag name
-                (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))*  # attributes
+                (?:\w+)         # tag name
+                (?:             # attributes
+                    \s+                           # whitespace after tag
+                    (?:[^\t<>"'=/]+:)?
+                    [^<>"'=/]+=                   # attr name
+                    (?:".*?"|'.*?'|[^<>"'=/\s]+)  # value, quoted or unquoted. If unquoted, no spaces allowed
+                )*
                 \s*/?>
                 |
                 # auto-link (e.g., <http://www.activestate.com/>)
@@ -1356,9 +1362,23 @@ def _is_comment(token):
             is_html_markup = not is_html_markup
         return ''.join(tokens)
 
-    def _unhash_html_spans(self, text: str) -> str:
-        for key, sanitized in list(self.html_spans.items()):
-            text = text.replace(key, sanitized)
+    def _unhash_html_spans(self, text: str, spans=True, code=False) -> str:
+        '''
+        Recursively unhash a block of text
+
+        Args:
+            spans: unhash anything from `self.html_spans`
+            code: unhash code blocks
+        '''
+        orig = ''
+        while text != orig:
+            if spans:
+                for key, sanitized in list(self.html_spans.items()):
+                    text = text.replace(key, sanitized)
+            if code:
+                for code, key in list(self._code_table.items()):
+                    text = text.replace(key, code)
+            orig = text
         return text
 
     def _sanitize_html(self, s: str) -> str:
@@ -1584,8 +1604,9 @@ def _do_links(self, text: str) -> str:
 
                     # We've got to encode these to avoid conflicting
                     # with italics/bold.
-                    url = url.replace('*', self._escape_table['*']) \
-                             .replace('_', self._escape_table['_'])
+                    url = self._unhash_html_spans(url, code=True) \
+                              .replace('*', self._escape_table['*']) \
+                              .replace('_', self._escape_table['_'])
                     if title:
                         title_str = ' title="%s"' % (
                             _xml_escape_attr(title)
diff --git a/test/tm-cases/issue601_xss.html b/test/tm-cases/issue601_xss.html
@@ -0,0 +1 @@
+<p>&lt;img src=# onerror="alert()"&gt;&lt;/p&gt;</p>
diff --git a/test/tm-cases/issue601_xss.opts b/test/tm-cases/issue601_xss.opts
@@ -0,0 +1 @@
+{"safe_mode": "escape"}
diff --git a/test/tm-cases/issue601_xss.text b/test/tm-cases/issue601_xss.text
@@ -0,0 +1 @@
+<img src=# onerror="alert()"></p>
diff --git a/test/tm-cases/issue603_xss.html b/test/tm-cases/issue603_xss.html
@@ -0,0 +1,6 @@
+<p><img src="code&gt;&quot; onerror=alert()//&lt;/code" alt="" /></p>
+
+<p><img src="&quot; onerror=alert()//" alt="" />
+<a href="#"></a>
+<img src="`&quot; onerror=alert()//`" alt="" />
+<img src="&lt;code&gt;&quot; onerror=alert()//&lt;code&gt;" alt="" /></p>
diff --git a/test/tm-cases/issue603_xss.opts b/test/tm-cases/issue603_xss.opts
@@ -0,0 +1 @@
+{"safe_mode": "escape"}
diff --git a/test/tm-cases/issue603_xss.text b/test/tm-cases/issue603_xss.text
@@ -0,0 +1,12 @@
+![](`" onerror=alert()//`)
+
+
+![][XSS]
+[][XSS]
+![][XSS2]
+![][XSS3]
+
+
+[XSS]: " onerror=alert()//
+[XSS2]: `" onerror=alert()//`
+[XSS3]: <code>" onerror=alert()//<code>

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+<p><img src=# onerror="alert()"></p></p>`