@@ -121,9 +121,11 @@ def __init__(
121
121
persist = False ,
122
122
):
123
123
if persist :
124
+ # Use temp directories in OS agnostic way
124
125
self .zodb_db_location = (
125
126
tempfile .gettempdir () + "/flattentool-" + str (uuid .uuid4 ())
126
127
)
128
+ # zlibstorage lowers disk usage by a lot at very small performance cost
127
129
zodb_storage = zc .zlibstorage .ZlibStorage (
128
130
ZODB .FileStorage .FileStorage (self .zodb_db_location )
129
131
)
@@ -133,7 +135,10 @@ def __init__(
133
135
self .db = ZODB .DB (None )
134
136
135
137
self .connection = self .db .open ()
138
+
139
+ # ZODB root, only objects attached here will be persisted
136
140
root = self .connection .root
141
+ # OOBTree means a btree with keys and values are objects (including strings)
137
142
root .sheet_store = BTrees .OOBTree .BTree ()
138
143
139
144
self .sub_sheets = {}
@@ -151,6 +156,8 @@ def __init__(
151
156
self .persist = persist
152
157
153
158
if schema_parser :
159
+ # schema parser does not make sheets that are persistant,
160
+ # so use from_sheets which deep copies everything in it.
154
161
self .main_sheet = PersistentSheet .from_sheet (
155
162
schema_parser .main_sheet , self .connection
156
163
)
@@ -293,9 +300,13 @@ def parse(self):
293
300
# fall over on empty activity, e.g. <iati-activity/>
294
301
continue
295
302
self .parse_json_dict (json_dict , sheet = self .main_sheet )
303
+ # only persist every 2000 objects. peristing more often slows down storing.
304
+ # 2000 top level objects normally not too much to store in memory.
296
305
if num % 2000 == 0 and num != 0 :
297
306
transaction .commit ()
298
307
308
+ # This commit could be removed which would mean that upto 2000 objects
309
+ # could be stored in memory without anything being persisted.
299
310
transaction .commit ()
300
311
301
312
if self .remove_empty_schema_columns :
0 commit comments