Skip to content

Commit e9cc019

Browse files
committed
Merge branch 'develop'
2 parents 53ba0da + aceced6 commit e9cc019

File tree

6 files changed

+102
-26
lines changed

6 files changed

+102
-26
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
*.pyc
22
*.egg
3+
.eggs/
4+
*.egg-info/
35
.DS_Store
46
/.env
57
/dist

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License (MIT)
2+
3+
Copyright 2012-2018 Matthew Tretter
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

markdownify/__init__.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from bs4 import BeautifulSoup, NavigableString
22
import re
3+
import six
34

45

56
convert_heading_re = re.compile(r'convert_h(\d+)')
@@ -22,6 +23,19 @@ def escape(text):
2223
return text.replace('_', r'\_')
2324

2425

26+
def chomp(text):
27+
"""
28+
If the text in an inline tag like b, a, or em contains a leading or trailing
29+
space, strip the string and return a space as suffix of prefix, if needed.
30+
This function is used to prevent conversions like
31+
<b> foo</b> => ** foo**
32+
"""
33+
prefix = ' ' if text and text[0] == ' ' else ''
34+
suffix = ' ' if text and text[-1] == ' ' else ''
35+
text = text.strip()
36+
return (prefix, suffix, text)
37+
38+
2539
def _todict(obj):
2640
return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_'))
2741

@@ -52,7 +66,7 @@ def convert(self, html):
5266
# want a full document. Therefore, we'll mark our fragment with an id,
5367
# create the document, and extract the element with the id.
5468
html = wrapped % html
55-
soup = BeautifulSoup(html)
69+
soup = BeautifulSoup(html, 'html.parser')
5670
return self.process_tag(soup.find(id=FRAGMENT_ID), children_only=True)
5771

5872
def process_tag(self, node, children_only=False):
@@ -61,7 +75,7 @@ def process_tag(self, node, children_only=False):
6175
# Convert the children first
6276
for el in node.children:
6377
if isinstance(el, NavigableString):
64-
text += self.process_text(unicode(el))
78+
text += self.process_text(six.text_type(el))
6579
else:
6680
text += self.process_tag(el)
6781

@@ -109,13 +123,16 @@ def underline(self, text, pad_char):
109123
return '%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''
110124

111125
def convert_a(self, el, text):
126+
prefix, suffix, text = chomp(text)
127+
if not text:
128+
return ''
112129
href = el.get('href')
113130
title = el.get('title')
114131
if self.options['autolinks'] and text == href and not title:
115132
# Shortcut syntax
116133
return '<%s>' % href
117134
title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
118-
return '[%s](%s%s)' % (text or '', href, title_part) if href else text or ''
135+
return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text
119136

120137
def convert_b(self, el, text):
121138
return self.convert_strong(el, text)
@@ -127,7 +144,10 @@ def convert_br(self, el, text):
127144
return ' \n'
128145

129146
def convert_em(self, el, text):
130-
return '*%s*' % text if text else ''
147+
prefix, suffix, text = chomp(text)
148+
if not text:
149+
return ''
150+
return '%s*%s*%s' % (prefix, text, suffix)
131151

132152
def convert_hn(self, n, el, text):
133153
style = self.options['heading_style']
@@ -151,8 +171,9 @@ def convert_list(self, el, text):
151171
break
152172
el = el.parent
153173
if nested:
154-
text = '\n' + self.indent(text, 1)
155-
return text
174+
# remove trailing newline if nested
175+
return '\n' + self.indent(text, 1).rstrip()
176+
return '\n' + text + '\n'
156177

157178
convert_ul = convert_list
158179
convert_ol = convert_list
@@ -175,7 +196,10 @@ def convert_p(self, el, text):
175196
return '%s\n\n' % text if text else ''
176197

177198
def convert_strong(self, el, text):
178-
return '**%s**' % text if text else ''
199+
prefix, suffix, text = chomp(text)
200+
if not text:
201+
return ''
202+
return '%s**%s**%s' % (prefix, text, suffix)
179203

180204
def convert_img(self, el, text):
181205
alt = el.attrs.get('alt', None) or ''

markdownify/pkgmeta.py

Lines changed: 0 additions & 8 deletions
This file was deleted.

setup.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77

88
read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
99

10-
11-
pkgmeta = {}
12-
execfile(os.path.join(os.path.dirname(__file__), 'markdownify', 'pkgmeta.py'),
13-
pkgmeta)
10+
pkgmeta = {
11+
'__title__': 'markdownify',
12+
'__author__': 'Matthew Tretter',
13+
'__version__': '0.4.1',
14+
}
1415

1516

1617
class PyTest(TestCommand):
@@ -75,13 +76,13 @@ def run(self):
7576
'pytest',
7677
],
7778
install_requires=[
78-
'beautifulsoup4',
79+
'beautifulsoup4', 'six'
7980
],
8081
classifiers=[
8182
'Environment :: Web Environment',
8283
'Framework :: Django',
8384
'Intended Audience :: Developers',
84-
'License :: OSI Approved :: BSD License',
85+
'License :: OSI Approved :: MIT License',
8586
'Operating System :: OS Independent',
8687
'Programming Language :: Python :: 2.5',
8788
'Programming Language :: Python :: 2.6',

tests/test_conversions.py

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import re
33

44

5-
nested_uls = re.sub('\s+', '', """
5+
nested_uls = re.sub(r'\s+', '', """
66
<ul>
77
<li>1
88
<ul>
@@ -22,10 +22,28 @@
2222
</ul>""")
2323

2424

25+
def test_chomp():
26+
assert md(' <b></b> ') == ' '
27+
assert md(' <b> </b> ') == ' '
28+
assert md(' <b> </b> ') == ' '
29+
assert md(' <b> </b> ') == ' '
30+
assert md(' <b>s </b> ') == ' **s** '
31+
assert md(' <b> s</b> ') == ' **s** '
32+
assert md(' <b> s </b> ') == ' **s** '
33+
assert md(' <b> s </b> ') == ' **s** '
34+
35+
2536
def test_a():
2637
assert md('<a href="http://google.com">Google</a>') == '[Google](http://google.com)'
2738

2839

40+
def test_a_spaces():
41+
assert md('foo <a href="http://google.com">Google</a> bar') == 'foo [Google](http://google.com) bar'
42+
assert md('foo<a href="http://google.com"> Google</a> bar') == 'foo [Google](http://google.com) bar'
43+
assert md('foo <a href="http://google.com">Google </a>bar') == 'foo [Google](http://google.com) bar'
44+
assert md('foo <a href="http://google.com"></a> bar') == 'foo bar'
45+
46+
2947
def test_a_with_title():
3048
text = md('<a href="http://google.com" title="The &quot;Goog&quot;">Google</a>')
3149
assert text == r'[Google](http://google.com "The \"Goog\"")'
@@ -45,6 +63,13 @@ def test_b():
4563
assert md('<b>Hello</b>') == '**Hello**'
4664

4765

66+
def test_b_spaces():
67+
assert md('foo <b>Hello</b> bar') == 'foo **Hello** bar'
68+
assert md('foo<b> Hello</b> bar') == 'foo **Hello** bar'
69+
assert md('foo <b>Hello </b>bar') == 'foo **Hello** bar'
70+
assert md('foo <b></b> bar') == 'foo bar'
71+
72+
4873
def test_blockquote():
4974
assert md('<blockquote>Hello</blockquote>').strip() == '> Hello'
5075

@@ -62,6 +87,13 @@ def test_em():
6287
assert md('<em>Hello</em>') == '*Hello*'
6388

6489

90+
def test_em_spaces():
91+
assert md('foo <em>Hello</em> bar') == 'foo *Hello* bar'
92+
assert md('foo<em> Hello</em> bar') == 'foo *Hello* bar'
93+
assert md('foo <em>Hello </em>bar') == 'foo *Hello* bar'
94+
assert md('foo <em></em> bar') == 'foo bar'
95+
96+
6597
def test_h1():
6698
assert md('<h1>Hello</h1>') == 'Hello\n=====\n\n'
6799

@@ -90,7 +122,7 @@ def test_i():
90122

91123

92124
def test_ol():
93-
assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
125+
assert md('<ol><li>a</li><li>b</li></ol>') == '\n1. a\n2. b\n\n'
94126

95127

96128
def test_p():
@@ -102,19 +134,23 @@ def test_strong():
102134

103135

104136
def test_ul():
105-
assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'
137+
assert md('<ul><li>a</li><li>b</li></ul>') == '\n* a\n* b\n\n'
138+
139+
140+
def test_inline_ul():
141+
assert md('<p>foo</p><ul><li>a</li><li>b</li></ul><p>bar</p>') == 'foo\n\n\n* a\n* b\n\nbar\n\n'
106142

107143

108144
def test_nested_uls():
109145
"""
110146
Nested ULs should alternate bullet characters.
111147
112148
"""
113-
assert md(nested_uls) == '* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t+ b\n\t+ c\n\t\n* 2\n* 3\n'
149+
assert md(nested_uls) == '\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n\n'
114150

115151

116152
def test_bullets():
117-
assert md(nested_uls, bullets='-') == '- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t- b\n\t- c\n\t\n- 2\n- 3\n'
153+
assert md(nested_uls, bullets='-') == '\n- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n\n'
118154

119155

120156
def test_img():

0 commit comments

Comments
 (0)