@@ -121,9 +121,11 @@ def __init__(
121121 persist = False ,
122122 ):
123123 if persist :
124+ # Use temp directories in OS agnostic way
124125 self .zodb_db_location = (
125126 tempfile .gettempdir () + "/flattentool-" + str (uuid .uuid4 ())
126127 )
128+ # zlibstorage lowers disk usage by a lot at very small performance cost
127129 zodb_storage = zc .zlibstorage .ZlibStorage (
128130 ZODB .FileStorage .FileStorage (self .zodb_db_location )
129131 )
@@ -133,7 +135,10 @@ def __init__(
133135 self .db = ZODB .DB (None )
134136
135137 self .connection = self .db .open ()
138+
139+ # ZODB root, only objects attached here will be persisted
136140 root = self .connection .root
141+ # OOBTree means a btree with keys and values are objects (including strings)
137142 root .sheet_store = BTrees .OOBTree .BTree ()
138143
139144 self .sub_sheets = {}
@@ -151,6 +156,8 @@ def __init__(
151156 self .persist = persist
152157
153158 if schema_parser :
159+ # schema parser does not make sheets that are persistant,
160+ # so use from_sheets which deep copies everything in it.
154161 self .main_sheet = PersistentSheet .from_sheet (
155162 schema_parser .main_sheet , self .connection
156163 )
@@ -293,9 +300,13 @@ def parse(self):
293300 # fall over on empty activity, e.g. <iati-activity/>
294301 continue
295302 self .parse_json_dict (json_dict , sheet = self .main_sheet )
303+ # only persist every 2000 objects. peristing more often slows down storing.
304+ # 2000 top level objects normally not too much to store in memory.
296305 if num % 2000 == 0 and num != 0 :
297306 transaction .commit ()
298307
308+ # This commit could be removed which would mean that upto 2000 objects
309+ # could be stored in memory without anything being persisted.
299310 transaction .commit ()
300311
301312 if self .remove_empty_schema_columns :
0 commit comments