Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

- [pull #639] Fix middle-word-em interfering with strongs (#637)
- [pull #640] Fix code friendly extra stopping other syntax being processed (#638)
- [pull #644] Fix a number of em/strong issues (#641, #642, #643)


## python-markdown2 2.5.4
Expand Down
42 changes: 38 additions & 4 deletions lib/markdown2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1988,14 +1988,48 @@ def _encode_code(self, text: str) -> str:
self._code_table[text] = hashed
return hashed

_strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]?)(?<=\S)\1", re.S)
_strong_re = re.compile(r'''
(?:_{1,}|\*{1,})? # ignore any leading em chars because we want to wrap `<strong>` as tightly around the text as possible
# eg: `***abc***` -> `*<strong>abc</strong>*` instead of `<strong>*abc*</strong>`
# Makes subsequent <em> processing easier
(\*\*|__)(?=\S) # strong syntax - must be followed by a non whitespace char
(.+?) # the strong text itself
(?<=\S)\1 # closing syntax - must be preceeded by non whitespace char
''',
re.S | re.X
)
_em_re = re.compile(r"(\*|_)(?=\S)(.*?\S)\1", re.S)

@mark_stage(Stage.ITALIC_AND_BOLD)
def _do_italics_and_bold(self, text: str) -> str:
def sub(match: re.Match):
'''
regex sub function that checks that the match isn't matching across spans.
The span shouldn't be across a closing or opening HTML tag, although spans within
the span is acceptable.
'''
contents: str = match.group(2)
# the strong re also checks for leading em chars, so the match may cover some additional text
prefix = match.string[match.start(): match.regs[1][0]]
# look for all possible span HTML tags
for tag in re.findall(rf'</?({self._span_tags})', contents):
# if it's unbalanced then that violates the rules
if not self._tag_is_closed(tag, contents):
return prefix + match.group(1) + contents + match.group(1)

# if it is balanced, but the closing tag is before the opening then
# the text probably looks like `_</strong>abcdef<strong>_`, which is across 2 spans
close_index = contents.find(f'</{tag}')
open_index = contents.find(f'<{tag}')
if close_index != -1 and close_index < open_index:
return prefix + match.group(1) + contents + match.group(1)

syntax = 'strong' if len(match.group(1)) == 2 else 'em'
return f'{prefix}<{syntax}>{contents}</{syntax}>'

# <strong> must go first:
text = self._strong_re.sub(r"<strong>\2</strong>", text)
text = self._em_re.sub(r"<em>\2</em>", text)
text = self._strong_re.sub(sub, text)
text = self._em_re.sub(sub, text)
return text

_block_quote_base = r'''
Expand Down Expand Up @@ -3320,7 +3354,7 @@ def __init__(self, md: Markdown, options: Union[dict, bool, None]):
self.middle_word_em_re = re.compile(
r'''
(?<!^) # To be middle of a word, it cannot be at the start of the input
(?<![*_\s]) # cannot be preceeded by em character or whitespace (must be in middle of word)
(?<![*_\W]) # cannot be preceeded by em char or non word char (must be in middle of word)
([*_]) # em char
(?=\S) # must be followed by non-whitespace char
(?![*_]|$|\W) # cannot be followed by another em char, EOF or a non-word char
Expand Down
8 changes: 4 additions & 4 deletions test/markdowntest-cases/Strong and em together.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<p><strong><em>This is strong and em.</em></strong></p>
<p><em><strong>This is strong and em.</strong></em></p>

<p>So is <strong><em>this</em></strong> word.</p>
<p>So is <em><strong>this</strong></em> word.</p>

<p><strong><em>This is strong and em.</em></strong></p>
<p><em><strong>This is strong and em.</strong></em></p>

<p>So is <strong><em>this</em></strong> word.</p>
<p>So is <em><strong>this</strong></em> word.</p>
1 change: 1 addition & 0 deletions test/tm-cases/consecutive_strong_and_em.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p><strong>strong</strong><em>em</em><strong>strong</strong></p>
1 change: 1 addition & 0 deletions test/tm-cases/consecutive_strong_and_em.text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**strong***em***strong**
1 change: 1 addition & 0 deletions test/tm-cases/ems_across_spans.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p><strong>_confusing</strong> ident is <strong>_confusing</strong></p>
1 change: 1 addition & 0 deletions test/tm-cases/ems_across_spans.text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**_confusing** ident is **_confusing**
3 changes: 3 additions & 0 deletions test/tm-cases/middle_word_em_issue641.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<p><strong>Strong</strong> (<em>em</em>)</p>

<p>note:<em>this is good</em>, but <em>this is not</em></p>
1 change: 1 addition & 0 deletions test/tm-cases/middle_word_em_issue641.opts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{'extras': {'middle-word-em': False}}
3 changes: 3 additions & 0 deletions test/tm-cases/middle_word_em_issue641.text
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
**Strong** (*em*)

note:*this is good*, but *this is not*
2 changes: 1 addition & 1 deletion test/tm-cases/middle_word_em_with_extra_ems.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<p><strong>one_two_three</strong></p>

<p><strong><em>one_two_three</em></strong></p>
<p><em><strong>one_two_three</strong></em></p>

<p><em><strong>one_two_three</strong></em></p>

Expand Down