@@ -306,6 +306,77 @@ def loadDatabase(self):
306306 with open (cache_hash_file ,'w' ) as f :
307307 f .write (database_hash )
308308
309+ def loadThermoDatabase (self ):
310+ """
311+ Load the RMG Database.
312+
313+ The data is loaded from self.databaseDirectory, according to settings in:
314+
315+ * self.thermoLibraries
316+ * self.reactionLibraries
317+ * self.seedMechanisms
318+ * self.kineticsFamilies
319+ * self.kineticsDepositories
320+
321+ If `self.kineticsEstimator == 'rate rules'` then the training set values are
322+ added and the blanks are filled in by averaging.
323+
324+ If self.outputDirectory contains :file:`database.pkl` and :file:`database.hash` files then
325+ these are checked for validity and used as a cache. Once loaded (and averages filled
326+ in if necessary) then a cache (pickle and hash) is saved.
327+ """
328+ import inspect , hashlib , cPickle , rmgpy .utilities , scoop .shared
329+
330+ # Make a hash of everything that could alter the contents of the database once it is fully loaded.
331+ # Then we can compare this hash to the cached file to see if the cache is valid.
332+ database_metadata = {
333+ 'path' : self .databaseDirectory ,
334+ 'database hash' : rmgpy .utilities .path_checksum ([self .databaseDirectory ]),
335+ 'thermoLibraries' : self .thermoLibraries ,
336+ 'rmgpy.data source hash' : rmgpy .data .getSourceHash (),
337+ 'this source hash' : hashlib .sha1 (inspect .getsource (self .__class__ )).hexdigest (),
338+ }
339+ database_hash = hashlib .sha1 (cPickle .dumps (database_metadata )).hexdigest ()
340+ cache_hash_file = os .path .join (self .outputDirectory ,'database.hash' )
341+ cache_pickle_file = os .path .join (self .outputDirectory ,'database.pkl' )
342+ scoop .shared .setConst (databaseFile = cache_pickle_file , databaseHash = database_hash )
343+ if not os .path .exists (cache_pickle_file ):
344+ logging .info ("Couldn't find a database cache file {0!r} so will reload from source." .format (cache_pickle_file ))
345+ elif not os .path .exists (cache_hash_file ):
346+ logging .info ("Couldn't find database cache hash file {0!r} to validate cache so will reload from source." .format (cache_hash_file ))
347+ else :
348+ if database_hash != open (cache_hash_file ,'r' ).read ():
349+ logging .info ("According to hash file, it looks like database cache is not valid. Will clear it and reload." )
350+ os .unlink (cache_hash_file )
351+ os .unlink (cache_pickle_file )
352+ else :
353+ logging .info ("According to hash file, it looks like database cache is valid." )
354+ database = cPickle .load (open (cache_pickle_file , 'rb' ))
355+ # Check the database from the pickle really does have the hash in the database.hash file.
356+ if database .hash == database_hash :
357+ logging .info ("Database loaded from {0} has correct hash. Will use this cache." .format (cache_pickle_file ))
358+ self .database = database
359+ rmgpy .data .rmg .database = database # we need to store it in this module level variable too!
360+ return
361+ else :
362+ logging .info ("Database loaded from {0} has INCORRECT hash. Will clear the cache and reload." .format (cache_pickle_file ))
363+ os .unlink (cache_hash_file )
364+ os .unlink (cache_pickle_file )
365+
366+ self .database = RMGDatabase ()
367+ self .database .loadThermo (
368+ path = os .path .join (self .databaseDirectory , 'thermo' ),
369+ thermoLibraries = self .thermoLibraries ,
370+ depository = False , # Don't bother loading the depository information, as we don't use it
371+ )
372+
373+ self .database .hash = database_hash # store the hash in the database so we can check it when it is next pickled.
374+ logging .info ("Saving database cache in {0!r}" .format (cache_pickle_file ))
375+ self .database .saveToPickle (cache_pickle_file )
376+ with open (cache_hash_file ,'w' ) as f :
377+ f .write (database_hash )
378+
379+
309380
310381 def initialize (self , args ):
311382 """
0 commit comments