From 64af5997aaefa98599eb5ad73f08438083ab0767 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sat, 4 Oct 2025 10:51:18 +0100
Subject: [PATCH 1/5] Fix #641

---
 lib/markdown2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/markdown2.py b/lib/markdown2.py
index 488b24cc..01dce483 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -3320,7 +3320,7 @@ def __init__(self, md: Markdown, options: Union[dict, bool, None]):
         self.middle_word_em_re = re.compile(
             r'''
             (?<!^)         # To be middle of a word, it cannot be at the start of the input
-            (?<![*_\s])    # cannot be preceeded by em character or whitespace (must be in middle of word)
+            (?<![*_\W])    # cannot be preceeded by em char or non word char (must be in middle of word)
             ([*_])         # em char
             (?=\S)         # must be followed by non-whitespace char
             (?![*_]|$|\W)  # cannot be followed by another em char, EOF or a non-word char

From 4fb2fa13f086e4e006062571c4727cd8c6b969a8 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sat, 4 Oct 2025 11:55:32 +0100
Subject: [PATCH 2/5] Fix #642

---
 lib/markdown2.py                    | 27 +++++++++++++++++++++++++--
 test/tm-cases/ems_across_spans.html |  1 +
 test/tm-cases/ems_across_spans.text |  1 +
 3 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 test/tm-cases/ems_across_spans.html
 create mode 100644 test/tm-cases/ems_across_spans.text

diff --git a/lib/markdown2.py b/lib/markdown2.py
index 01dce483..dde8f704 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -1993,9 +1993,32 @@ def _encode_code(self, text: str) -> str:
 
     @mark_stage(Stage.ITALIC_AND_BOLD)
     def _do_italics_and_bold(self, text: str) -> str:
+        def sub(match: re.Match):
+            '''
+            regex sub function that checks that the match isn't matching across spans.
+            The span shouldn't be across a closing or opening HTML tag, although spans within
+            the span is acceptable.
+            '''
+            contents: str = match.group(2)
+            # look for all possible span HTML tags
+            for tag in re.findall(rf'</?({self._span_tags})', contents):
+                # if it's unbalanced then that violates the rules
+                if not self._tag_is_closed(tag, contents):
+                    return match.group(1) + contents + match.group(2)
+
+                # if it is balanced, but the closing tag is before the opening then
+                # the text probably looks like `_</strong>abcdef<strong>_`, which is across 2 spans
+                close_index = contents.find(f'</{tag}')
+                open_index = contents.find(f'<{tag}')
+                if close_index != -1 and close_index < open_index:
+                    return match.group(1) + contents + match.group(1)
+
+            syntax = 'strong' if len(match.group(1)) == 2 else 'em'
+            return f'<{syntax}>{contents}</{syntax}>'
+
         # <strong> must go first:
-        text = self._strong_re.sub(r"<strong>\2</strong>", text)
-        text = self._em_re.sub(r"<em>\2</em>", text)
+        text = self._strong_re.sub(sub, text)
+        text = self._em_re.sub(sub, text)
         return text
 
     _block_quote_base = r'''
diff --git a/test/tm-cases/ems_across_spans.html b/test/tm-cases/ems_across_spans.html
new file mode 100644
index 00000000..daef521f
--- /dev/null
+++ b/test/tm-cases/ems_across_spans.html
@@ -0,0 +1 @@
+<p><strong>_confusing</strong> ident is <strong>_confusing</strong></p>
diff --git a/test/tm-cases/ems_across_spans.text b/test/tm-cases/ems_across_spans.text
new file mode 100644
index 00000000..40cd465c
--- /dev/null
+++ b/test/tm-cases/ems_across_spans.text
@@ -0,0 +1 @@
+**_confusing** ident is **_confusing**
\ No newline at end of file

From 9a48294af4c0c5c794ea77907f700c3f67085fe8 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sun, 5 Oct 2025 16:46:36 +0100
Subject: [PATCH 3/5] Fix #643

---
 lib/markdown2.py                              | 19 +++++++++++++++----
 .../Strong and em together.html               |  8 ++++----
 test/tm-cases/consecutive_strong_and_em.html  |  1 +
 test/tm-cases/consecutive_strong_and_em.text  |  1 +
 test/tm-cases/middle_word_em_issue641.html    |  3 +++
 test/tm-cases/middle_word_em_issue641.opts    |  1 +
 test/tm-cases/middle_word_em_issue641.text    |  3 +++
 .../middle_word_em_with_extra_ems.html        |  2 +-
 8 files changed, 29 insertions(+), 9 deletions(-)
 create mode 100644 test/tm-cases/consecutive_strong_and_em.html
 create mode 100644 test/tm-cases/consecutive_strong_and_em.text
 create mode 100644 test/tm-cases/middle_word_em_issue641.html
 create mode 100644 test/tm-cases/middle_word_em_issue641.opts
 create mode 100644 test/tm-cases/middle_word_em_issue641.text

diff --git a/lib/markdown2.py b/lib/markdown2.py
index dde8f704..bd958c5d 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -1988,7 +1988,16 @@ def _encode_code(self, text: str) -> str:
         self._code_table[text] = hashed
         return hashed
 
-    _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]?)(?<=\S)\1", re.S)
+    _strong_re = re.compile(r'''
+        [*_]*            # ignore any leading em chars because we want to wrap `<strong>` as tightly around the text as possible
+                         # eg: `***abc***` -> `*<strong>abc</strong>*` instead of `<strong>*abc*</strong>`
+                         # Makes subsequent <em> processing easier
+        (\*\*|__)(?=\S)  # strong syntax - must be followed by a non whitespace char
+        (.+?)            # the strong text itself
+        (?<=\S)\1        # closing syntax - must be preceeded by non whitespace char
+        ''',
+        re.S | re.X
+    )
     _em_re = re.compile(r"(\*|_)(?=\S)(.*?\S)\1", re.S)
 
     @mark_stage(Stage.ITALIC_AND_BOLD)
@@ -2000,21 +2009,23 @@ def sub(match: re.Match):
             the span is acceptable.
             '''
             contents: str = match.group(2)
+            # the strong re also checks for leading em chars, so the match may cover some additional text
+            prefix = match.string[match.start(): match.regs[1][0]]
             # look for all possible span HTML tags
             for tag in re.findall(rf'</?({self._span_tags})', contents):
                 # if it's unbalanced then that violates the rules
                 if not self._tag_is_closed(tag, contents):
-                    return match.group(1) + contents + match.group(2)
+                    return prefix + match.group(1) + contents + match.group(1)
 
                 # if it is balanced, but the closing tag is before the opening then
                 # the text probably looks like `_</strong>abcdef<strong>_`, which is across 2 spans
                 close_index = contents.find(f'</{tag}')
                 open_index = contents.find(f'<{tag}')
                 if close_index != -1 and close_index < open_index:
-                    return match.group(1) + contents + match.group(1)
+                    return prefix + match.group(1) + contents + match.group(1)
 
             syntax = 'strong' if len(match.group(1)) == 2 else 'em'
-            return f'<{syntax}>{contents}</{syntax}>'
+            return f'{prefix}<{syntax}>{contents}</{syntax}>'
 
         # <strong> must go first:
         text = self._strong_re.sub(sub, text)
diff --git a/test/markdowntest-cases/Strong and em together.html b/test/markdowntest-cases/Strong and em together.html
index 71ec78c7..bab1b98f 100644
--- a/test/markdowntest-cases/Strong and em together.html	
+++ b/test/markdowntest-cases/Strong and em together.html	
@@ -1,7 +1,7 @@
-<p><strong><em>This is strong and em.</em></strong></p>
+<p><em><strong>This is strong and em.</strong></em></p>
 
-<p>So is <strong><em>this</em></strong> word.</p>
+<p>So is <em><strong>this</strong></em> word.</p>
 
-<p><strong><em>This is strong and em.</em></strong></p>
+<p><em><strong>This is strong and em.</strong></em></p>
 
-<p>So is <strong><em>this</em></strong> word.</p>
+<p>So is <em><strong>this</strong></em> word.</p>
diff --git a/test/tm-cases/consecutive_strong_and_em.html b/test/tm-cases/consecutive_strong_and_em.html
new file mode 100644
index 00000000..6478dd07
--- /dev/null
+++ b/test/tm-cases/consecutive_strong_and_em.html
@@ -0,0 +1 @@
+<p><strong>strong</strong><em>em</em><strong>strong</strong></p>
diff --git a/test/tm-cases/consecutive_strong_and_em.text b/test/tm-cases/consecutive_strong_and_em.text
new file mode 100644
index 00000000..663723f9
--- /dev/null
+++ b/test/tm-cases/consecutive_strong_and_em.text
@@ -0,0 +1 @@
+**strong***em***strong**
diff --git a/test/tm-cases/middle_word_em_issue641.html b/test/tm-cases/middle_word_em_issue641.html
new file mode 100644
index 00000000..39886631
--- /dev/null
+++ b/test/tm-cases/middle_word_em_issue641.html
@@ -0,0 +1,3 @@
+<p><strong>Strong</strong> (<em>em</em>)</p>
+
+<p>note:<em>this is good</em>, but <em>this is not</em></p>
diff --git a/test/tm-cases/middle_word_em_issue641.opts b/test/tm-cases/middle_word_em_issue641.opts
new file mode 100644
index 00000000..f1455c41
--- /dev/null
+++ b/test/tm-cases/middle_word_em_issue641.opts
@@ -0,0 +1 @@
+{'extras': {'middle-word-em': False}}
\ No newline at end of file
diff --git a/test/tm-cases/middle_word_em_issue641.text b/test/tm-cases/middle_word_em_issue641.text
new file mode 100644
index 00000000..b14e5d28
--- /dev/null
+++ b/test/tm-cases/middle_word_em_issue641.text
@@ -0,0 +1,3 @@
+**Strong** (*em*)
+
+note:*this is good*, but *this is not*
\ No newline at end of file
diff --git a/test/tm-cases/middle_word_em_with_extra_ems.html b/test/tm-cases/middle_word_em_with_extra_ems.html
index a86b1932..a8974039 100644
--- a/test/tm-cases/middle_word_em_with_extra_ems.html
+++ b/test/tm-cases/middle_word_em_with_extra_ems.html
@@ -2,7 +2,7 @@
 
 <p><strong>one_two_three</strong></p>
 
-<p><strong><em>one_two_three</em></strong></p>
+<p><em><strong>one_two_three</strong></em></p>
 
 <p><em><strong>one_two_three</strong></em></p>
 

From 3173942d11688c0682570cd7907a095b5259114f Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sun, 5 Oct 2025 17:54:22 +0100
Subject: [PATCH 4/5] Fix ReDoS regression

---
 lib/markdown2.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/markdown2.py b/lib/markdown2.py
index bd958c5d..8b99ec3d 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -1989,12 +1989,12 @@ def _encode_code(self, text: str) -> str:
         return hashed
 
     _strong_re = re.compile(r'''
-        [*_]*            # ignore any leading em chars because we want to wrap `<strong>` as tightly around the text as possible
-                         # eg: `***abc***` -> `*<strong>abc</strong>*` instead of `<strong>*abc*</strong>`
-                         # Makes subsequent <em> processing easier
-        (\*\*|__)(?=\S)  # strong syntax - must be followed by a non whitespace char
-        (.+?)            # the strong text itself
-        (?<=\S)\1        # closing syntax - must be preceeded by non whitespace char
+        (?:_{1,}|\*{1,})?  # ignore any leading em chars because we want to wrap `<strong>` as tightly around the text as possible
+                           # eg: `***abc***` -> `*<strong>abc</strong>*` instead of `<strong>*abc*</strong>`
+                           # Makes subsequent <em> processing easier
+        (\*\*|__)(?=\S)    # strong syntax - must be followed by a non whitespace char
+        (.+?)              # the strong text itself
+        (?<=\S)\1          # closing syntax - must be preceeded by non whitespace char
         ''',
         re.S | re.X
     )

From 40bd17f43735609858779c57e7f12a29991f3a2c Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sun, 5 Oct 2025 17:56:55 +0100
Subject: [PATCH 5/5] update changelog

---
 CHANGES.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGES.md b/CHANGES.md
index 11db62d5..60c03486 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,6 +4,7 @@
 
 - [pull #639] Fix middle-word-em interfering with strongs (#637)
 - [pull #640] Fix code friendly extra stopping other syntax being processed (#638)
+- [pull #644] Fix a number of em/strong issues (#641, #642, #643)
 
 
 ## python-markdown2 2.5.4