forked from m-bone/AutoMapper
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAtomObjectBuilder.py
More file actions
294 lines (235 loc) · 14.5 KB
/
AtomObjectBuilder.py
File metadata and controls
294 lines (235 loc) · 14.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
##############################################################################
# Developed by: Matthew Bone
# Last Updated: 30/07/2021
# Updated by: Matthew Bone
#
# Contact Details:
# Bristol Composites Institute (BCI)
# Department of Aerospace Engineering - University of Bristol
# Queen's Building - University Walk
# Bristol, BS8 1TR
# U.K.
# Email - [email protected]
#
# File Description:
# Contains key atom object creation and manipulation tools. The Atom object
# class and builder are the building blocks for map creation.
##############################################################################
import logging
from collections import Counter
from LammpsSearchFuncs import get_data, find_sections, get_neighbours, get_additional_neighbours
from LammpsTreatmentFuncs import clean_data
def build_atom_objects(fileName, elementDict, bondingAtoms, createAtoms=[]):
# Load molecule file
with open(fileName, 'r') as f:
lines = f.readlines()
# Clean data and get coords and bonds
data = clean_data(lines)
sections = find_sections(data)
types = get_data('Types', data, sections)
atomIDs = [row[0] for row in types]
bonds = get_data('Bonds', data, sections)
# Build neighbours dict
neighboursDict = get_neighbours(atomIDs, bonds)
# Remove createAtoms as neighbours - confuses the map and are not required
if createAtoms is not None:
for keyAtom, neighbours in neighboursDict.items():
updatedList = []
for atom in neighbours:
if atom not in createAtoms:
updatedList.append(atom) # Done this way as cannot update list whilst iterating through it
neighboursDict[keyAtom] = updatedList
def get_elements(neighbourIDs, elementDict):
return [elementDict[atomID]for atomID in neighbourIDs]
atomObjectDict = {}
for index, atomID in enumerate(atomIDs):
# Prevent createAtoms from enetering object dict
if createAtoms is not None:
if atomID in createAtoms:
continue
# Get atom type
atomType = types[index][1]
# Establish all neighbours
neighbours = neighboursDict[atomID]
secondNeighbours = get_additional_neighbours(neighboursDict, atomID, neighbours, bondingAtoms)
thirdNeighbours = get_additional_neighbours(neighboursDict, atomID, secondNeighbours, bondingAtoms)
neighbourElements = get_elements(neighbours, elementDict)
secondNeighbourElements = get_elements(secondNeighbours, elementDict)
thirdNeighbourElements = get_elements(thirdNeighbours, elementDict)
# Check if atom is a bonding atom, return boolean
if atomID in bondingAtoms:
bondingAtom = True
else:
bondingAtom = False
atom = Atom(atomID, atomType, elementDict[atomID], bondingAtom, neighbours, secondNeighbours, thirdNeighbours, neighbourElements, secondNeighbourElements, thirdNeighbourElements)
atomObjectDict[atomID] = atom
return atomObjectDict
def compare_symmetric_atoms(postNeighbourAtomObjectList, preNeighbourAtom, outputType, allowInference=True):
# Neighbour comparison - no inference
def compare_neighbours(neighbourLevel):
neighbourComparison = [getattr(atomObject, neighbourLevel) for atomObject in postNeighbourAtomObjectList]
neighbourFingerprint = [''.join(sorted(elements)) for elements in neighbourComparison] # sorted to get alphabetical fingerprints
# Remove duplicate fingerprints
countFingerprints = Counter(neighbourFingerprint)
tuppledFingerprints = [(index, fingerprint) for index, fingerprint in enumerate(neighbourFingerprint) if countFingerprints[fingerprint] == 1]
# If any of the fingerprints are empty (i.e. the atom has no Xneighbours) return None
for _, fingerprint in tuppledFingerprints:
if fingerprint == '':
return None
# Any of the potential post neighbours matches the pre atom fingerprint, return the post neighbour
for index, fingerprint in tuppledFingerprints:
if ''.join(sorted(getattr(preNeighbourAtom, neighbourLevel))) == fingerprint:
logging.debug(f'Pre: {preNeighbourAtom.atomID}, Post: {postNeighbourAtomObjectList[index].atomID} found with {neighbourLevel}')
if outputType == 'index':
return index
elif outputType == 'atomID':
return postNeighbourAtomObjectList[index].atomID
else:
print('Invalid output type specified for compare_symmetric_atoms')
# First neighbour comparison
symmetryResult = compare_neighbours('firstNeighbourElements')
# Second neighbour comparison
if symmetryResult is None:
symmetryResult = compare_neighbours('secondNeighbourElements')
# Third neighbour comparison
if symmetryResult is None:
symmetryResult = compare_neighbours('thirdNeighbourElements')
# If it makes it through all these, guess assignment and warn user about this
if symmetryResult is not None:
return symmetryResult
else:
if allowInference: # Only if inference is turned on
# Find all potential choices by breaking the postNeighbourAtomList down into atoms that match the preAtom element
possibleChoices = []
for index, postNeighbourAtom in enumerate(postNeighbourAtomObjectList):
if postNeighbourAtom.element == preNeighbourAtom.element:
possibleChoices.append((index, postNeighbourAtom.atomID))
# Let the user know that an inference has been made
logging.debug(f'Pre: {preNeighbourAtom.atomID}, Post: {possibleChoices[0][1]} found with symmetry inference')
print(
f'Note: Pre-bond atomID {preNeighbourAtom.atomID} has been assigned by inference to post-bond atomID {possibleChoices[0][1]}. The potential choices were {[atom[1] for atom in possibleChoices]}. Please check this is correct.'
)
if outputType == 'index':
return possibleChoices[0][0]
elif outputType == 'atomID':
return possibleChoices[0][1]
else:
print('Invalid output type specified for compare_symmetric_atoms')
class Atom():
def __init__(self, atomID, atomType, element, bondingAtom, neighbourIDs, secondNeighbourIDs, thirdNeighbourIDs, neighbourElements, secondNeighbourElements, thirdNeighbourElements):
self.atomID = atomID
self.atomType = atomType
self.element = element
self.bondingAtom = bondingAtom
# Neighbours
self.mappedNeighbourIDs = neighbourIDs # This is changed according to mapping
self.firstNeighbourIDs = neighbourIDs.copy() # This is fixed throughout mapping process
self.secondNeighbourIDs = secondNeighbourIDs
self.thirdNeighbourIDs = thirdNeighbourIDs
self.mappedNeighbourElements = neighbourElements # This is changed according to mapping
self.firstNeighbourElements = neighbourElements.copy() # This is fixed throughout mapping process
self.secondNeighbourElements = secondNeighbourElements
self.thirdNeighbourElements = thirdNeighbourElements
def check_mapped(self, mappedIDs, searchIndex, elementDict):
"""Update neighbourIDs.
Updates neighbourIDs by removing IDs that have already been mapped.
This will be called before all neighbour mapping attempts to stop atoms
being mapped multiple times.
Args:
mappedIDs: The total list of mappedIDs at this point in the mapping. This
will contain pre- and post-atomIDs
searchIndex: Determines whether to use pre- or post-atomIDs
Returns:
Updates existing class variable self.NeighbourIDs
"""
searchIndexMappedIDs = [row[searchIndex] for row in mappedIDs]
self.mappedNeighbourIDs = [ID for ID in self.mappedNeighbourIDs if ID not in searchIndexMappedIDs]
self.mappedNeighbourElements = [elementDict[atomID]for atomID in self.mappedNeighbourIDs]
def map_elements(self, atomObject, preAtomObjectDict, postAtomObjectDict):
"""Map preAtom IDs to postAtomIDs by comparing neighbouring element symbols.
Compares the occurence of string chemical element symbols of a preAtom's neighbours
to the known postAtom's neighbours. Creates a list of new mappedIDs and any missing IDs
based on the neighbours of the known pre and postAtom pair.
Relies on compare_symmetric atoms to handle non-H atoms with >1 occurence.
Args:
self: This is a preAtom that has succesfully been mapped
atomObject: The known postAtom that has already been mapped to the preAtom
preAtomObjectDict: A dictionary of all preAtoms in the molecule
postAtomObjectDict: A dictionary of all the postAtoms in the molecule
Returns:
A partial mappedIDlist, partial missing pre and postAtom lists and additional atoms for the queue
"""
# Output variables
mapList = []
missingPreAtoms = []
queueAtoms = []
def allowed_maps(preAtom, postAtom):
'''Populate missing atoms prematurely instead of making misleading
maps based on element occurence'''
# Checks if elements appear the same number of times in pre and post atoms
# If they don't, mapping is not allowed to take place and atoms are moved to missing lists
preElementOccurences = Counter(preAtom.mappedNeighbourElements)
postElementOccurences = Counter(postAtom.mappedNeighbourElements)
allowedMapDict = {}
for element, count in preElementOccurences.items():
if count == postElementOccurences[element]:
allowedMapDict[element] = True
else:
allowedMapDict[element] = False
# Force all H to be be True as hydrogen can be mapped by inference
if 'H' in allowedMapDict:
allowedMapDict['H'] = True
return allowedMapDict
allowedMapDict = allowed_maps(self, atomObject)
# Match Function
def matchNeighbour(preAtom, postAtom, preAtomIndex, postAtomIndex, mapList, queueList):
# Append pre and post atomIDs to map
mapList.append([preAtom.mappedNeighbourIDs[preAtomIndex], postAtom.mappedNeighbourIDs[postAtomIndex]])
# Add all non-hydrogen atom atomIDs to queue
if preAtom.mappedNeighbourElements[preAtomIndex] != 'H':
queueList.append([preAtom.mappedNeighbourIDs[preAtomIndex], postAtom.mappedNeighbourIDs[postAtomIndex]])
# Remove post atomID from mappedID and mappedElement atom object values
postAtom.mappedNeighbourIDs.pop(postAtomIndex)
postAtom.mappedNeighbourElements.pop(postAtomIndex)
# Loop through neighbours for preAtom and compare to neighbours of postAtom
for preIndex, neighbour in enumerate(self.mappedNeighbourElements):
elementOccurence = atomObject.mappedNeighbourElements.count(neighbour)
# Check if maps with the neighbour element are allowed, if not add current element to missing list
if allowedMapDict[neighbour] == False:
missingPreAtoms.append(self.mappedNeighbourIDs[preIndex])
continue
# If no match in post atom list it is a missingPreAtom
if elementOccurence == 0:
missingPreAtoms.append(self.mappedNeighbourIDs[preIndex])
# Assign atomIDs if there is only one matching element - could this go wrong if an element moves and an identical element takes its place?
elif elementOccurence == 1:
postIndex = atomObject.mappedNeighbourElements.index(neighbour)
logging.debug(f'Pre: {self.mappedNeighbourIDs[preIndex]}, Post: {atomObject.mappedNeighbourIDs[postIndex]} found with single element occurence')
matchNeighbour(self, atomObject, preIndex, postIndex, mapList, queueAtoms)
# More than one matching element requires additional considerations
elif elementOccurence > 1:
if neighbour == 'H': # H can be handled simply as all H are equivalent to each other in this case - ignores chirality
postHydrogenIndexList = [index for index, element in enumerate(atomObject.mappedNeighbourElements) if element == 'H']
postIndex = postHydrogenIndexList.pop()
logging.debug(f'Pre: {self.mappedNeighbourIDs[preIndex]}, Post: {atomObject.mappedNeighbourIDs[postIndex]} found with hydrogen symmetry inference')
matchNeighbour(self, atomObject, preIndex, postIndex, mapList, queueAtoms)
else:
# Get neighbour post atoms objects
postNeighbourIndices = [index for index, val in enumerate(atomObject.mappedNeighbourElements) if val == neighbour]
postNeighbourAtomIDs = [atomObject.mappedNeighbourIDs[i] for i in postNeighbourIndices]
postNeighbourAtomObjects = [postAtomObjectDict[atomID] for atomID in postNeighbourAtomIDs]
# Get possible pre atom object
preNeighbourAtomObject = preAtomObjectDict[self.mappedNeighbourIDs[preIndex]]
# Find the post atom ID for the current pre atom
postNeighbourAtomID = compare_symmetric_atoms(postNeighbourAtomObjects, preNeighbourAtomObject, 'atomID')
if postNeighbourAtomID is not None:
postIndex = atomObject.mappedNeighbourIDs.index(postNeighbourAtomID)
matchNeighbour(self, atomObject, preIndex, postIndex, mapList, queueAtoms)
else:
# If no post atom found, add pre atom missing atom list
print(f'Could not find the symmetric pair for preAtom {self.mappedNeighbourIDs[preIndex]}')
missingPreAtoms.append(self.mappedNeighbourIDs[preIndex])
# Search mapList for missingPostAtoms
mappedPostAtomList = [row[1] for row in mapList]
missingPostAtoms = [neighbour for neighbour in atomObject.mappedNeighbourIDs if neighbour not in mappedPostAtomList]
return mapList, missingPreAtoms, missingPostAtoms, queueAtoms