Merge pull request #595 from Crozzers/fix-img-alt-text

nicholasserra · web-flow · commit 1e0fbf2c2b57 · 2024-08-07T16:51:26.000-04:00
Fix img alt text being processed as markdown (#594)
diff --git a/CHANGES.md b/CHANGES.md
@@ -4,6 +4,7 @@
 
 - [pull #590] Fix underscores within bold text getting emphasized (#589)
 - [pull #591] Add Alerts extra
+- [pull #595] Fix img alt text being processed as markdown (#594)
 
 
 ## python-markdown2 2.5.0
diff --git a/lib/markdown2.py b/lib/markdown2.py
@@ -516,8 +516,8 @@ def convert(self, text: str) -> 'UnicodeWithAttrs':
 
         text = self._unescape_special_chars(text)
 
+        text = self._unhash_html_spans(text)
         if self.safe_mode:
-            text = self._unhash_html_spans(text)
             # return the removed text warning to its markdown.py compatible form
             text = text.replace(self.html_removed_text, self.html_removed_text_compat)
 
@@ -1336,24 +1336,19 @@ def _is_comment(token):
                 return
             return re.match(r'(<!--)(.*)(-->)', token)
 
-        def _hash(token):
-            key = _hash_text(token)
-            self.html_spans[key] = token
-            return key
-
         tokens = []
         split_tokens = self._sorta_html_tokenize_re.split(text)
         is_html_markup = False
         for index, token in enumerate(split_tokens):
             if is_html_markup and not _is_auto_link(token) and not _is_code_span(index, token):
                 is_comment = _is_comment(token)
                 if is_comment:
-                    tokens.append(_hash(self._sanitize_html(is_comment.group(1))))
+                    tokens.append(self._hash_span(self._sanitize_html(is_comment.group(1))))
                     # sanitise but leave comment body intact for further markdown processing
                     tokens.append(self._sanitize_html(is_comment.group(2)))
-                    tokens.append(_hash(self._sanitize_html(is_comment.group(3))))
+                    tokens.append(self._hash_span(self._sanitize_html(is_comment.group(3))))
                 else:
-                    tokens.append(_hash(self._sanitize_html(token)))
+                    tokens.append(self._hash_span(self._sanitize_html(token)))
             else:
                 tokens.append(self._encode_incomplete_tags(token))
             is_html_markup = not is_html_markup
@@ -1600,7 +1595,7 @@ def _do_links(self, text: str) -> str:
                         img_class_str = self._html_class_str_from_tag("img")
                         result = '<img src="%s" alt="%s"%s%s%s' \
                             % (self._protect_url(url),
-                               _xml_escape_attr(link_text),
+                               self._hash_span(_xml_escape_attr(link_text)),
                                title_str,
                                img_class_str,
                                self.empty_element_suffix)
@@ -1657,7 +1652,7 @@ def _do_links(self, text: str) -> str:
                             img_class_str = self._html_class_str_from_tag("img")
                             result = '<img src="%s" alt="%s"%s%s%s' \
                                 % (self._protect_url(url),
-                                   _xml_escape_attr(link_text),
+                                   self._hash_span(_xml_escape_attr(link_text)),
                                    title_str,
                                    img_class_str,
                                    self.empty_element_suffix)
@@ -2422,6 +2417,15 @@ def _outdent(self, text: str) -> str:
         # Remove one level of line-leading tabs or spaces
         return self._outdent_re.sub('', text)
 
+    def _hash_span(self, text: str) -> str:
+        '''
+        Wrapper around `_hash_text` that also adds the hash to `self.hash_spans`,
+        meaning it will be automatically unhashed during conversion.
+        '''
+        key = _hash_text(text)
+        self.html_spans[key] = text
+        return key
+
     @staticmethod
     def _uniform_outdent(
         text: str,
diff --git a/test/tm-cases/img_alt_text.html b/test/tm-cases/img_alt_text.html
@@ -0,0 +1 @@
+<p><img src="d" alt="a*b*c" /></p>
diff --git a/test/tm-cases/img_alt_text.text b/test/tm-cases/img_alt_text.text
@@ -0,0 +1 @@
+![a*b*c](d)