Skip to content

Commit

Permalink
lines: XML/HTML parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Ayan123430 authored Oct 5, 2020
1 parent ebfd55d commit 67a65e3
Showing 1 changed file with 21 additions and 0 deletions.
21 changes: 21 additions & 0 deletions XML/HTML parsing
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
dinner_recipe = '''<html><body><table>
<tr><th>amt</th><th>unit</th><th>item</th></tr>
<tr><td>24</td><td>slices</td><td>baguette</td></tr>
<tr><td>2+</td><td>tbsp</td><td>olive oil</td></tr>
<tr><td>1</td><td>cup</td><td>tomatoes</td></tr>
<tr><td>1</td><td>jar</td><td>pesto</td></tr>
</table></body></html>'''

# From http://effbot.org/zone/element-index.htm
import xml.etree.ElementTree as etree
tree = etree.fromstring(dinner_recipe)

# For invalid HTML use http://effbot.org/zone/element-soup.htm
# import ElementSoup, StringIO
# tree = ElementSoup.parse(StringIO.StringIO(dinner_recipe))

pantry = set(['olive oil', 'pesto'])
for ingredient in tree.getiterator('tr'):
amt, unit, item = ingredient
if item.tag == "td" and item.text not in pantry:
print ("%s: %s %s" % (item.text, amt.text, unit.text))

0 comments on commit 67a65e3

Please sign in to comment.