44import six
55
66
7+ # General-purpose regex patterns
8+ re_convert_heading = re .compile (r'convert_h(\d+)' )
79re_line_with_content = re .compile (r'^(.*)' , flags = re .MULTILINE )
810re_whitespace = re .compile (r'[\t ]+' )
911re_all_whitespace = re .compile (r'[\t \r\n]+' )
1012re_newline_whitespace = re .compile (r'[\t \r\n]*[\r\n][\t \r\n]*' )
1113re_html_heading = re .compile (r'h(\d+)' )
1214
13- # extract (leading_nl, content, trailing_nl) from a string
15+ # Pattern for creating convert_<tag> function names from tag names
16+ re_make_convert_fn_name = re .compile (r'[\[\]:-]' )
17+
18+ # Extract (leading_nl, content, trailing_nl) from a string
1419# (functionally equivalent to r'^(\n*)(.*?)(\n*)$', but greedy is faster than reluctant here)
1520re_extract_newlines = re .compile (r'^(\n*)((?:.*[^\n])?)(\n*)$' , flags = re .DOTALL )
1621
22+ # Escape miscellaneous special Markdown characters
23+ re_escape_misc_chars = re .compile (r'([]\\&<`[>~=+|])' )
24+
25+ # Escape sequence of one or more consecutive '-', preceded
26+ # and followed by whitespace or start/end of fragment, as it
27+ # might be confused with an underline of a header, or with a
28+ # list marker
29+ re_escape_misc_dash_sequences = re .compile (r'(\s|^)(-+(?:\s|$))' )
30+
31+ # Escape sequence of up to six consecutive '#', preceded
32+ # and followed by whitespace or start/end of fragment, as
33+ # it might be confused with an ATX heading
34+ re_escape_misc_hashes = re .compile (r'(\s|^)(#{1,6}(?:\s|$))' )
35+
36+ # Escape '.' or ')' preceded by up to nine digits, as it might be
37+ # confused with a list item
38+ re_escape_misc_list_items = re .compile (r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))' )
1739
1840# Heading styles
1941ATX = 'atx'
@@ -346,7 +368,7 @@ def get_conv_fn(self, tag_name):
346368 return lambda el , text , parent_tags : self ._convert_hn (n , el , text , parent_tags )
347369
348370 # For other tags, look up their conversion function by tag name
349- convert_fn_name = "convert_%s" % re .sub (r"[\[\]:-]" , "_" , tag_name )
371+ convert_fn_name = "convert_%s" % re_make_convert_fn_name .sub ('_' , tag_name )
350372 convert_fn = getattr (self , convert_fn_name , None )
351373 return convert_fn
352374
@@ -365,20 +387,11 @@ def escape(self, text, parent_tags):
365387 if not text :
366388 return ''
367389 if self .options ['escape_misc' ]:
368- text = re .sub (r'([]\\&<`[>~=+|])' , r'\\\1' , text )
369- # A sequence of one or more consecutive '-', preceded and
370- # followed by whitespace or start/end of fragment, might
371- # be confused with an underline of a header, or with a
372- # list marker.
373- text = re .sub (r'(\s|^)(-+(?:\s|$))' , r'\1\\\2' , text )
374- # A sequence of up to six consecutive '#', preceded and
375- # followed by whitespace or start/end of fragment, might
376- # be confused with an ATX heading.
377- text = re .sub (r'(\s|^)(#{1,6}(?:\s|$))' , r'\1\\\2' , text )
378- # '.' or ')' preceded by up to nine digits might be
379- # confused with a list item.
380- text = re .sub (r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))' , r'\1\\\2' ,
381- text )
390+ text = re_escape_misc_chars .sub (r'\\\1' , text )
391+ text = re_escape_misc_dash_sequences .sub (r'\1\\\2' , text )
392+ text = re_escape_misc_hashes .sub (r'\1\\\2' , text )
393+ text = re_escape_misc_list_items .sub (r'\1\\\2' , text )
394+
382395 if self .options ['escape_asterisks' ]:
383396 text = text .replace ('*' , r'\*' )
384397 if self .options ['escape_underscores' ]:
0 commit comments