From 67a65e3e55d23fd9cd1e39e49eec9bce0b7369b0 Mon Sep 17 00:00:00 2001 From: Ayan123430 <72403860+Ayan123430@users.noreply.github.com> Date: Mon, 5 Oct 2020 22:13:42 +0530 Subject: [PATCH] lines: XML/HTML parsing --- XML/HTML parsing | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 XML/HTML parsing diff --git a/XML/HTML parsing b/XML/HTML parsing new file mode 100644 index 00000000000..8d8c2825e02 --- /dev/null +++ b/XML/HTML parsing @@ -0,0 +1,21 @@ +dinner_recipe = ''' + + + + + +
amtunititem
24slicesbaguette
2+tbspolive oil
1cuptomatoes
1jarpesto
''' + +# From http://effbot.org/zone/element-index.htm +import xml.etree.ElementTree as etree +tree = etree.fromstring(dinner_recipe) + +# For invalid HTML use http://effbot.org/zone/element-soup.htm +# import ElementSoup, StringIO +# tree = ElementSoup.parse(StringIO.StringIO(dinner_recipe)) + +pantry = set(['olive oil', 'pesto']) +for ingredient in tree.getiterator('tr'): + amt, unit, item = ingredient + if item.tag == "td" and item.text not in pantry: + print ("%s: %s %s" % (item.text, amt.text, unit.text))