AutoMapper/AtomObjectBuilder.py at main · jrgissing/AutoMapper · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
##############################################################################
# Developed by: Matthew Bone
# Last Updated: 30/07/2021
# Updated by: Matthew Bone
#
# Contact Details:
# Bristol Composites Institute (BCI)
# Department of Aerospace Engineering - University of Bristol
# Queen's Building - University Walk
# Bristol, BS8 1TR
# U.K.
# Email - [email protected]
#
# File Description:
# Contains key atom object creation and manipulation tools. The Atom object
# class and builder are the building blocks for map creation.
##############################################################################

import logging
from collections import Counter

from LammpsSearchFuncs import get_data, find_sections, get_neighbours, get_additional_neighbours
from LammpsTreatmentFuncs import clean_data

def build_atom_objects(fileName, elementDict, bondingAtoms, createAtoms=[]):
    # Load molecule file
    with open(fileName, 'r') as f:
        lines = f.readlines()

    # Clean data and get coords and bonds
    data = clean_data(lines)
    sections = find_sections(data)
    types = get_data('Types', data, sections)

    atomIDs = [row[0] for row in types]
    bonds = get_data('Bonds', data, sections)

    # Build neighbours dict
    neighboursDict = get_neighbours(atomIDs, bonds)

    # Remove createAtoms as neighbours - confuses the map and are not required
    if createAtoms is not None:
        for keyAtom, neighbours in neighboursDict.items():
            updatedList = []
            for atom in neighbours:
                if atom not in createAtoms:
                    updatedList.append(atom) # Done this way as cannot update list whilst iterating through it

            neighboursDict[keyAtom] = updatedList

    def get_elements(neighbourIDs, elementDict):
        return [elementDict[atomID]for atomID in neighbourIDs]

    atomObjectDict = {}
    for index, atomID in enumerate(atomIDs):
        # Prevent createAtoms from enetering object dict
        if createAtoms is not None:
            if atomID in createAtoms:
                continue

        # Get atom type
        atomType = types[index][1]

        # Establish all neighbours
        neighbours = neighboursDict[atomID]
        secondNeighbours = get_additional_neighbours(neighboursDict, atomID, neighbours, bondingAtoms)
        thirdNeighbours = get_additional_neighbours(neighboursDict, atomID, secondNeighbours, bondingAtoms)

        neighbourElements = get_elements(neighbours, elementDict)
        secondNeighbourElements = get_elements(secondNeighbours, elementDict)
        thirdNeighbourElements = get_elements(thirdNeighbours, elementDict)

        # Check if atom is a bonding atom, return boolean
        if atomID in bondingAtoms:
            bondingAtom = True
        else:
            bondingAtom = False

        atom = Atom(atomID, atomType, elementDict[atomID], bondingAtom, neighbours, secondNeighbours, thirdNeighbours, neighbourElements, secondNeighbourElements, thirdNeighbourElements)
        atomObjectDict[atomID] = atom

    return atomObjectDict

def compare_symmetric_atoms(postNeighbourAtomObjectList, preNeighbourAtom, outputType, allowInference=True):
    # Neighbour comparison - no inference
    def compare_neighbours(neighbourLevel):
        neighbourComparison = [getattr(atomObject, neighbourLevel) for atomObject in postNeighbourAtomObjectList]
        neighbourFingerprint = [''.join(sorted(elements)) for elements in neighbourComparison] # sorted to get alphabetical fingerprints

        # Remove duplicate fingerprints
        countFingerprints = Counter(neighbourFingerprint)
        tuppledFingerprints = [(index, fingerprint) for index, fingerprint in enumerate(neighbourFingerprint) if countFingerprints[fingerprint] == 1]

        # If any of the fingerprints are empty (i.e. the atom has no Xneighbours) return None
        for _, fingerprint in tuppledFingerprints:
            if fingerprint == '':
                return None

        # Any of the potential post neighbours matches the pre atom fingerprint, return the post neighbour
        for index, fingerprint in tuppledFingerprints:
            if ''.join(sorted(getattr(preNeighbourAtom, neighbourLevel))) == fingerprint:
                logging.debug(f'Pre: {preNeighbourAtom.atomID}, Post: {postNeighbourAtomObjectList[index].atomID} found with {neighbourLevel}')
                if outputType == 'index':
                    return index
                elif outputType == 'atomID':
                    return postNeighbourAtomObjectList[index].atomID
                else:
                    print('Invalid output type specified for compare_symmetric_atoms')

    # First neighbour comparison
    symmetryResult = compare_neighbours('firstNeighbourElements')

    # Second neighbour comparison
    if symmetryResult is None:
        symmetryResult = compare_neighbours('secondNeighbourElements')

    # Third neighbour comparison
    if symmetryResult is None:
        symmetryResult = compare_neighbours('thirdNeighbourElements')

    # If it makes it through all these, guess assignment and warn user about this
    if symmetryResult is not None:
        return symmetryResult
    else:
        if allowInference: # Only if inference is turned on
            # Find all potential choices by breaking the postNeighbourAtomList down into atoms that match the preAtom element
            possibleChoices = []
            for index, postNeighbourAtom in enumerate(postNeighbourAtomObjectList):
                if postNeighbourAtom.element == preNeighbourAtom.element:
                    possibleChoices.append((index, postNeighbourAtom.atomID))

            # Let the user know that an inference has been made
            logging.debug(f'Pre: {preNeighbourAtom.atomID}, Post: {possibleChoices[0][1]} found with symmetry inference')
            print(
                f'Note: Pre-bond atomID {preNeighbourAtom.atomID} has been assigned by inference to post-bond atomID {possibleChoices[0][1]}. The potential choices were {[atom[1] for atom in possibleChoices]}. Please check this is correct.'
            )
            if outputType == 'index':
                    return possibleChoices[0][0]
            elif outputType == 'atomID':
                return possibleChoices[0][1]
            else:
                print('Invalid output type specified for compare_symmetric_atoms')


class Atom():
    def __init__(self, atomID, atomType, element, bondingAtom, neighbourIDs, secondNeighbourIDs, thirdNeighbourIDs, neighbourElements, secondNeighbourElements, thirdNeighbourElements):
        self.atomID = atomID
        self.atomType = atomType
        self.element = element
        self.bondingAtom = bondingAtom

        # Neighbours
        self.mappedNeighbourIDs = neighbourIDs # This is changed according to mapping
        self.firstNeighbourIDs = neighbourIDs.copy() # This is fixed throughout mapping process
        self.secondNeighbourIDs = secondNeighbourIDs
        self.thirdNeighbourIDs = thirdNeighbourIDs

        self.mappedNeighbourElements = neighbourElements # This is changed according to mapping
        self.firstNeighbourElements = neighbourElements.copy() # This is fixed throughout mapping process
        self.secondNeighbourElements = secondNeighbourElements
        self.thirdNeighbourElements = thirdNeighbourElements

    def check_mapped(self, mappedIDs, searchIndex, elementDict):
        """Update neighbourIDs.

        Updates neighbourIDs by removing IDs that have already been mapped.
        This will be called before all neighbour mapping attempts to stop atoms
        being mapped multiple times.

        Args:
            mappedIDs: The total list of mappedIDs at this point in the mapping. This
                will contain pre- and post-atomIDs
            searchIndex: Determines whether to use pre- or post-atomIDs

        Returns:
            Updates existing class variable self.NeighbourIDs
        """
        searchIndexMappedIDs = [row[searchIndex] for row in mappedIDs]

        self.mappedNeighbourIDs = [ID for ID in self.mappedNeighbourIDs if ID not in searchIndexMappedIDs]
        self.mappedNeighbourElements = [elementDict[atomID]for atomID in self.mappedNeighbourIDs]


    def map_elements(self, atomObject, preAtomObjectDict, postAtomObjectDict):
        """Map preAtom IDs to postAtomIDs by comparing neighbouring element symbols.

        Compares the occurence of string chemical element symbols of a preAtom's neighbours
        to the known postAtom's neighbours. Creates a list of new mappedIDs and any missing IDs
        based on the neighbours of the known pre and postAtom pair.
        Relies on compare_symmetric atoms to handle non-H atoms with >1 occurence.

        Args:
            self: This is a preAtom that has succesfully been mapped
            atomObject: The known postAtom that has already been mapped to the preAtom
            preAtomObjectDict: A dictionary of all preAtoms in the molecule
            postAtomObjectDict: A dictionary of all the postAtoms in the molecule

        Returns:
            A partial mappedIDlist, partial missing pre and postAtom lists and additional atoms for the queue
        """

        # Output variables
        mapList = []
        missingPreAtoms = []
        queueAtoms = []

        def allowed_maps(preAtom, postAtom):
            '''Populate missing atoms prematurely instead of making misleading
            maps based on element occurence'''
            # Checks if elements appear the same number of times in pre and post atoms
            # If they don't, mapping is not allowed to take place and atoms are moved to missing lists
            preElementOccurences = Counter(preAtom.mappedNeighbourElements)
            postElementOccurences = Counter(postAtom.mappedNeighbourElements)

            allowedMapDict = {}
            for element, count in preElementOccurences.items():
                if count == postElementOccurences[element]:
                    allowedMapDict[element] = True
                else:
                    allowedMapDict[element] = False

            # Force all H to be be True as hydrogen can be mapped by inference
            if 'H' in allowedMapDict:
                allowedMapDict['H'] = True

            return allowedMapDict

        allowedMapDict = allowed_maps(self, atomObject)

        # Match Function
        def matchNeighbour(preAtom, postAtom, preAtomIndex, postAtomIndex, mapList, queueList):
            # Append pre and post atomIDs to map
            mapList.append([preAtom.mappedNeighbourIDs[preAtomIndex], postAtom.mappedNeighbourIDs[postAtomIndex]])

            # Add all non-hydrogen atom atomIDs to queue
            if preAtom.mappedNeighbourElements[preAtomIndex] != 'H':
                queueList.append([preAtom.mappedNeighbourIDs[preAtomIndex], postAtom.mappedNeighbourIDs[postAtomIndex]])

            # Remove post atomID from mappedID and mappedElement atom object values
            postAtom.mappedNeighbourIDs.pop(postAtomIndex)
            postAtom.mappedNeighbourElements.pop(postAtomIndex)

        # Loop through neighbours for preAtom and compare to neighbours of postAtom
        for preIndex, neighbour in enumerate(self.mappedNeighbourElements):
            elementOccurence = atomObject.mappedNeighbourElements.count(neighbour)

            # Check if maps with the neighbour element are allowed, if not add current element to missing list
            if allowedMapDict[neighbour] == False:
                missingPreAtoms.append(self.mappedNeighbourIDs[preIndex])
                continue

            # If no match in post atom list it is a missingPreAtom
            if elementOccurence == 0:
                missingPreAtoms.append(self.mappedNeighbourIDs[preIndex])

            # Assign atomIDs if there is only one matching element - could this go wrong if an element moves and an identical element takes its place?
            elif elementOccurence == 1:
                postIndex = atomObject.mappedNeighbourElements.index(neighbour)
                logging.debug(f'Pre: {self.mappedNeighbourIDs[preIndex]}, Post: {atomObject.mappedNeighbourIDs[postIndex]} found with single element occurence')
                matchNeighbour(self, atomObject, preIndex, postIndex, mapList, queueAtoms)

            # More than one matching element requires additional considerations
            elif elementOccurence > 1:
                if neighbour == 'H': # H can be handled simply as all H are equivalent to each other in this case - ignores chirality
                    postHydrogenIndexList = [index for index, element in enumerate(atomObject.mappedNeighbourElements) if element == 'H']
                    postIndex = postHydrogenIndexList.pop()
                    logging.debug(f'Pre: {self.mappedNeighbourIDs[preIndex]}, Post: {atomObject.mappedNeighbourIDs[postIndex]} found with hydrogen symmetry inference')
                    matchNeighbour(self, atomObject, preIndex, postIndex, mapList, queueAtoms)

                else:
                    # Get neighbour post atoms objects
                    postNeighbourIndices = [index for index, val in enumerate(atomObject.mappedNeighbourElements) if val == neighbour]
                    postNeighbourAtomIDs = [atomObject.mappedNeighbourIDs[i] for i in postNeighbourIndices]
                    postNeighbourAtomObjects = [postAtomObjectDict[atomID] for atomID in postNeighbourAtomIDs]

                    # Get possible pre atom object
                    preNeighbourAtomObject = preAtomObjectDict[self.mappedNeighbourIDs[preIndex]]

                    # Find the post atom ID for the current pre atom
                    postNeighbourAtomID = compare_symmetric_atoms(postNeighbourAtomObjects, preNeighbourAtomObject, 'atomID')
                    if postNeighbourAtomID is not None:
                        postIndex = atomObject.mappedNeighbourIDs.index(postNeighbourAtomID)
                        matchNeighbour(self, atomObject, preIndex, postIndex, mapList, queueAtoms)
                    else:
                        # If no post atom found, add pre atom missing atom list
                        print(f'Could not find the symmetric pair for preAtom {self.mappedNeighbourIDs[preIndex]}')
                        missingPreAtoms.append(self.mappedNeighbourIDs[preIndex])


        # Search mapList for missingPostAtoms
        mappedPostAtomList = [row[1] for row in mapList]
        missingPostAtoms = [neighbour for neighbour in atomObject.mappedNeighbourIDs if neighbour not in mappedPostAtomList]

        return mapList, missingPreAtoms, missingPostAtoms, queueAtoms