6
6
convert_heading_re = re .compile (r'convert_h(\d+)' )
7
7
line_beginning_re = re .compile (r'^' , re .MULTILINE )
8
8
whitespace_re = re .compile (r'[\r\n\s\t ]+' )
9
+ html_heading_re = re .compile (r'h[1-6]' )
9
10
10
11
11
12
# Heading styles
@@ -61,22 +62,28 @@ def __init__(self, **options):
61
62
62
63
def convert (self , html ):
63
64
soup = BeautifulSoup (html , 'html.parser' )
64
- return self .process_tag (soup , children_only = True )
65
+ return self .process_tag (soup , convert_as_inline = False , children_only = True )
65
66
66
- def process_tag (self , node , children_only = False ):
67
+ def process_tag (self , node , convert_as_inline , children_only = False ):
67
68
text = ''
69
+ # markdown headings can't include block elements (elements w/newlines)
70
+ isHeading = html_heading_re .match (node .name ) is not None
71
+ convert_children_as_inline = convert_as_inline
72
+
73
+ if not children_only and isHeading :
74
+ convert_children_as_inline = True
68
75
69
76
# Convert the children first
70
77
for el in node .children :
71
78
if isinstance (el , NavigableString ):
72
79
text += self .process_text (six .text_type (el ))
73
80
else :
74
- text += self .process_tag (el )
81
+ text += self .process_tag (el , convert_children_as_inline )
75
82
76
83
if not children_only :
77
84
convert_fn = getattr (self , 'convert_%s' % node .name , None )
78
85
if convert_fn and self .should_convert_tag (node .name ):
79
- text = convert_fn (node , text )
86
+ text = convert_fn (node , text , convert_as_inline )
80
87
81
88
return text
82
89
@@ -89,8 +96,8 @@ def __getattr__(self, attr):
89
96
if m :
90
97
n = int (m .group (1 ))
91
98
92
- def convert_tag (el , text ):
93
- return self .convert_hn (n , el , text )
99
+ def convert_tag (el , text , convert_as_inline ):
100
+ return self .convert_hn (n , el , text , convert_as_inline )
94
101
95
102
convert_tag .__name__ = 'convert_h%s' % n
96
103
setattr (self , convert_tag .__name__ , convert_tag )
@@ -116,10 +123,12 @@ def underline(self, text, pad_char):
116
123
text = (text or '' ).rstrip ()
117
124
return '%s\n %s\n \n ' % (text , pad_char * len (text )) if text else ''
118
125
119
- def convert_a (self , el , text ):
126
+ def convert_a (self , el , text , convert_as_inline ):
120
127
prefix , suffix , text = chomp (text )
121
128
if not text :
122
129
return ''
130
+ if convert_as_inline :
131
+ return text
123
132
href = el .get ('href' )
124
133
title = el .get ('title' )
125
134
if self .options ['autolinks' ] and text == href and not title :
@@ -128,22 +137,32 @@ def convert_a(self, el, text):
128
137
title_part = ' "%s"' % title .replace ('"' , r'\"' ) if title else ''
129
138
return '%s[%s](%s%s)%s' % (prefix , text , href , title_part , suffix ) if href else text
130
139
131
- def convert_b (self , el , text ):
132
- return self .convert_strong (el , text )
140
+ def convert_b (self , el , text , convert_as_inline ):
141
+ return self .convert_strong (el , text , convert_as_inline )
142
+
143
+ def convert_blockquote (self , el , text , convert_as_inline ):
144
+
145
+ if convert_as_inline :
146
+ return text
133
147
134
- def convert_blockquote (self , el , text ):
135
148
return '\n ' + line_beginning_re .sub ('> ' , text ) if text else ''
136
149
137
- def convert_br (self , el , text ):
150
+ def convert_br (self , el , text , convert_as_inline ):
151
+ if convert_as_inline :
152
+ return ""
153
+
138
154
return ' \n '
139
155
140
- def convert_em (self , el , text ):
156
+ def convert_em (self , el , text , convert_as_inline ):
141
157
prefix , suffix , text = chomp (text )
142
158
if not text :
143
159
return ''
144
160
return '%s*%s*%s' % (prefix , text , suffix )
145
161
146
- def convert_hn (self , n , el , text ):
162
+ def convert_hn (self , n , el , text , convert_as_inline ):
163
+ if convert_as_inline :
164
+ return text
165
+
147
166
style = self .options ['heading_style' ]
148
167
text = text .rstrip ()
149
168
if style == UNDERLINED and n <= 2 :
@@ -154,10 +173,14 @@ def convert_hn(self, n, el, text):
154
173
return '%s %s %s\n \n ' % (hashes , text , hashes )
155
174
return '%s %s\n \n ' % (hashes , text )
156
175
157
- def convert_i (self , el , text ):
158
- return self .convert_em (el , text )
176
+ def convert_i (self , el , text , convert_as_inline ):
177
+ return self .convert_em (el , text , convert_as_inline )
178
+
179
+ def convert_list (self , el , text , convert_as_inline ):
180
+
181
+ # Converting a list to inline is undefined.
182
+ # Ignoring convert_to_inline for list.
159
183
160
- def convert_list (self , el , text ):
161
184
nested = False
162
185
while el :
163
186
if el .name == 'li' :
@@ -172,7 +195,7 @@ def convert_list(self, el, text):
172
195
convert_ul = convert_list
173
196
convert_ol = convert_list
174
197
175
- def convert_li (self , el , text ):
198
+ def convert_li (self , el , text , convert_as_inline ):
176
199
parent = el .parent
177
200
if parent is not None and parent .name == 'ol' :
178
201
if parent .get ("start" ):
@@ -190,20 +213,25 @@ def convert_li(self, el, text):
190
213
bullet = bullets [depth % len (bullets )]
191
214
return '%s %s\n ' % (bullet , text or '' )
192
215
193
- def convert_p (self , el , text ):
216
+ def convert_p (self , el , text , convert_as_inline ):
217
+ if convert_as_inline :
218
+ return text
194
219
return '%s\n \n ' % text if text else ''
195
220
196
- def convert_strong (self , el , text ):
221
+ def convert_strong (self , el , text , convert_as_inline ):
197
222
prefix , suffix , text = chomp (text )
198
223
if not text :
199
224
return ''
200
225
return '%s**%s**%s' % (prefix , text , suffix )
201
226
202
- def convert_img (self , el , text ):
227
+ def convert_img (self , el , text , convert_as_inline ):
203
228
alt = el .attrs .get ('alt' , None ) or ''
204
229
src = el .attrs .get ('src' , None ) or ''
205
230
title = el .attrs .get ('title' , None ) or ''
206
231
title_part = ' "%s"' % title .replace ('"' , r'\"' ) if title else ''
232
+ if convert_as_inline :
233
+ return alt
234
+
207
235
return '' % (alt , src , title_part )
208
236
209
237
0 commit comments