6
6
Biological Structures at Stanford, funded under the NIH Roadmap for
7
7
Medical Research, grant U54 GM072970. See https://simtk.org.
8
8
9
- Portions copyright (c) 2013-2023 Stanford University and the Authors.
9
+ Portions copyright (c) 2013-2024 Stanford University and the Authors.
10
10
Authors: Peter Eastman
11
11
Contributors:
12
12
54
54
import os
55
55
import os .path
56
56
import math
57
+ from collections import defaultdict
57
58
58
59
from pkg_resources import resource_filename
59
60
@@ -98,6 +99,15 @@ def __init__(self, chainId, number, residueName, standardName):
98
99
self .residueName = residueName
99
100
self .standardName = standardName
100
101
102
+ class Template :
103
+ """Template represents a standard residue, or a nonstandard one registered with registerTemplate()."""
104
+ def __init__ (self , topology , positions , terminal = None ):
105
+ self .topology = topology
106
+ self .positions = positions
107
+ if terminal is None :
108
+ terminal = [False ]* topology .getNumAtoms ()
109
+ self .terminal = terminal
110
+
101
111
def _guessFileFormat (file , filename ):
102
112
"""Guess whether a file is PDB or PDBx/mmCIF based on its filename and contents."""
103
113
filename = filename .lower ()
@@ -276,7 +286,7 @@ def __init__(self, filename=None, pdbfile=None, pdbxfile=None, url=None, pdbid=N
276
286
for file in os .listdir (templatesPath ):
277
287
templatePdb = app .PDBFile (os .path .join (templatesPath , file ))
278
288
name = next (templatePdb .topology .residues ()).name
279
- self .templates [name ] = templatePdb
289
+ self .templates [name ] = Template ( templatePdb . topology , templatePdb . positions )
280
290
281
291
def _initializeFromPDB (self , file ):
282
292
"""Initialize this object by reading a PDB file."""
@@ -344,6 +354,96 @@ def _initializeFromPDBx(self, file):
344
354
for row in modData .getRowList ():
345
355
self .modifiedResidues .append (ModifiedResidue (row [asymIdCol ], int (row [resNumCol ]), row [resNameCol ], row [standardResCol ]))
346
356
357
+ def _getTemplate (self , name ):
358
+ """Return the template with a name. If none has been registered, this will return None."""
359
+ if name in self .templates :
360
+ return self .templates [name ]
361
+ return None
362
+
363
+ def registerTemplate (self , topology , positions , terminal = None ):
364
+ """Register a template for a nonstandard residue. This allows PDBFixer to add missing residues of this type,
365
+ to add missing atoms to existing residues, and to mutate other residues to it.
366
+ Parameters
367
+ ----------
368
+ topology: openmm.app.Topology
369
+ A Topology containing a single chain with a single residue, describing the nonstandard residue
370
+ being registered.
371
+ positions: array of shape (n_atoms, 3)
372
+ The positions of the atoms in the residue in a typical conformation. These positions are used
373
+ when adding missing atoms or residues.
374
+ terminal: optional list of bool
375
+ If this is present, it should be a list of length equal to the number of atoms in the residue.
376
+ If an element is True, that indicates the corresponding atom should only be added to terminal
377
+ residues.
378
+ """
379
+ residues = list (topology .residues ())
380
+ if len (residues ) != 1 :
381
+ raise ValueError ('The Topology must contain a single residue' )
382
+ if topology .getNumAtoms () != len (positions ):
383
+ raise ValueError ('The number of positions does not match the number of atoms in the Topology' )
384
+ if terminal is not None and len (terminal ) != topology .getNumAtoms ():
385
+ raise ValueError ('The number of terminal flags does not match the number of atoms in the Topology' )
386
+ self .templates [residues [0 ].name ] = Template (topology , positions , terminal )
387
+
388
+ def downloadTemplate (self , name ):
389
+ """Attempt to download a residue definition from the PDB and register a template for it.
390
+
391
+ Parameters
392
+ ----------
393
+ name: str
394
+ The name of the residue, as specified in the PDB Chemical Component Dictionary.
395
+
396
+ Returns
397
+ -------
398
+ True if a template was successfully registered, false otherwise.
399
+ """
400
+ name = name .upper ()
401
+ try :
402
+ file = urlopen (f'https://files.rcsb.org/ligands/download/{ name } .cif' )
403
+ contents = file .read ().decode ('utf-8' )
404
+ file .close ()
405
+ except :
406
+ return False
407
+
408
+ # Load the atoms.
409
+
410
+ from openmm .app .internal .pdbx .reader .PdbxReader import PdbxReader
411
+ reader = PdbxReader (StringIO (contents ))
412
+ data = []
413
+ reader .read (data )
414
+ block = data [0 ]
415
+ atomData = block .getObj ('chem_comp_atom' )
416
+ atomNameCol = atomData .getAttributeIndex ('atom_id' )
417
+ symbolCol = atomData .getAttributeIndex ('type_symbol' )
418
+ leavingCol = atomData .getAttributeIndex ('pdbx_leaving_atom_flag' )
419
+ xCol = atomData .getAttributeIndex ('pdbx_model_Cartn_x_ideal' )
420
+ yCol = atomData .getAttributeIndex ('pdbx_model_Cartn_y_ideal' )
421
+ zCol = atomData .getAttributeIndex ('pdbx_model_Cartn_z_ideal' )
422
+ topology = app .Topology ()
423
+ chain = topology .addChain ()
424
+ residue = topology .addResidue (name , chain )
425
+ positions = []
426
+ atomByName = {}
427
+ terminal = []
428
+ for row in atomData .getRowList ():
429
+ atomName = row [atomNameCol ]
430
+ atom = topology .addAtom (atomName , app .Element .getBySymbol (row [symbolCol ]), residue )
431
+ atomByName [atomName ] = atom
432
+ terminal .append (row [leavingCol ] == 'Y' )
433
+ positions .append (mm .Vec3 (float (row [xCol ]), float (row [yCol ]), float (row [zCol ]))* 0.1 )
434
+ positions = positions * unit .nanometers
435
+
436
+ # Load the bonds.
437
+
438
+ bondData = block .getObj ('chem_comp_bond' )
439
+ if bondData is not None :
440
+ atom1Col = bondData .getAttributeIndex ('atom_id_1' )
441
+ atom2Col = bondData .getAttributeIndex ('atom_id_2' )
442
+ for row in bondData .getRowList ():
443
+ topology .addBond (atomByName [row [atom1Col ]], atomByName [row [atom2Col ]])
444
+ self .registerTemplate (topology , positions , terminal )
445
+ return True
446
+
347
447
def _addAtomsToTopology (self , heavyAtomsOnly , omitUnknownMolecules ):
348
448
"""Create a new Topology in which missing atoms have been added.
349
449
@@ -375,7 +475,7 @@ def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules):
375
475
addedOXT = []
376
476
residueCenters = [self ._computeResidueCenter (res ).value_in_unit (unit .nanometers ) for res in self .topology .residues ()]* unit .nanometers
377
477
for chain in self .topology .chains ():
378
- if omitUnknownMolecules and not any ( residue .name in self . templates for residue in chain .residues ()):
478
+ if omitUnknownMolecules and all ( self . _getTemplate ( residue .name ) is None for residue in chain .residues ()):
379
479
continue
380
480
chainResidues = list (chain .residues ())
381
481
newChain = newTopology .addChain (chain .id )
@@ -413,7 +513,7 @@ def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules):
413
513
414
514
# Find corresponding atoms in the residue and the template.
415
515
416
- template = self .templates [ residue .name ]
516
+ template = self ._getTemplate ( residue .name )
417
517
atomPositions = dict ((atom .name , self .positions [atom .index ]) for atom in residue .atoms ())
418
518
points1 = []
419
519
points2 = []
@@ -508,7 +608,7 @@ def _addMissingResiduesToChain(self, chain, residueNames, startPosition, endPosi
508
608
# Add the residues.
509
609
510
610
for i , residueName in enumerate (residueNames ):
511
- template = self .templates [ residueName ]
611
+ template = self ._getTemplate ( residueName )
512
612
513
613
# Find a translation that best matches the adjacent residue.
514
614
@@ -703,7 +803,7 @@ def findNonstandardResidues(self):
703
803
replacement = modres [key ]
704
804
if replacement == 'DU' :
705
805
replacement = 'DT'
706
- if replacement in self . templates :
806
+ if self . _getTemplate ( replacement ) != None :
707
807
nonstandard [residue ] = replacement
708
808
self .nonstandardResidues = [(r , nonstandard [r ]) for r in sorted (nonstandard , key = lambda r : r .index )]
709
809
@@ -731,7 +831,7 @@ def replaceNonstandardResidues(self):
731
831
732
832
for residue , replaceWith in self .nonstandardResidues :
733
833
residue .name = replaceWith
734
- template = self .templates [ replaceWith ]
834
+ template = self ._getTemplate ( replaceWith )
735
835
standardAtoms = set (atom .name for atom in template .topology .atoms ())
736
836
for atom in residue .atoms ():
737
837
if atom .element in (None , hydrogen ) or atom .name not in standardAtoms :
@@ -760,6 +860,10 @@ def applyMutations(self, mutations, chain_id):
760
860
Notes
761
861
-----
762
862
863
+ If a target residue is not a standard amino acid, and if no template
864
+ has been registered for it with registerTemplate(), this function
865
+ attempts to look it up from the PDB and create a new template for it.
866
+
763
867
We require three letter codes to avoid possible ambiguitities.
764
868
We can't guarantee that the resulting model is a good one; for
765
869
significant changes in sequence, you should probably be using
@@ -800,10 +904,11 @@ def applyMutations(self, mutations, chain_id):
800
904
if residue .name != old_name :
801
905
raise (ValueError ("You asked to mutate chain %s residue %d name %s, but that residue is actually %s!" % (chain_id , resSeq , old_name , residue .name )))
802
906
803
- try :
804
- template = self .templates [new_name ]
805
- except KeyError :
806
- raise (KeyError ("Cannot find residue %s in template library!" % new_name ))
907
+ if self ._getTemplate (new_name ) is None :
908
+ # Try to download a template from the PDB.
909
+ self .downloadTemplate (new_name )
910
+ if self ._getTemplate (new_name ) is None :
911
+ raise (KeyError ("Cannot find residue %s in template library!" % new_name ))
807
912
808
913
# Store mutation
809
914
residue_map [residue ] = new_name
@@ -816,7 +921,7 @@ def applyMutations(self, mutations, chain_id):
816
921
for residue in residue_map .keys ():
817
922
replaceWith = residue_map [residue ]
818
923
residue .name = replaceWith
819
- template = self .templates [ replaceWith ]
924
+ template = self ._getTemplate ( replaceWith )
820
925
standardAtoms = set (atom .name for atom in template .topology .atoms ())
821
926
for atom in residue .atoms ():
822
927
if atom .element in (None , hydrogen ) or atom .name not in standardAtoms :
@@ -858,23 +963,60 @@ def findMissingAtoms(self):
858
963
missingAtoms = {}
859
964
missingTerminals = {}
860
965
966
+ # Determine which atoms have an external bond to another residue.
967
+
968
+ hasExternal = defaultdict (bool )
969
+ for atom1 , atom2 in self .topology .bonds ():
970
+ if atom1 .residue != atom2 .residue :
971
+ hasExternal [(atom1 .residue , atom1 .name )] = True
972
+ hasExternal [(atom2 .residue , atom2 .name )] = True
973
+ for chain in self .topology .chains ():
974
+ chainResidues = list (chain .residues ())
975
+ for residue in chain .residues ():
976
+ atomNames = [atom .name for atom in residue .atoms ()]
977
+ if all (name in atomNames for name in ['C' , 'O' , 'CA' ]):
978
+ # We'll be adding peptide bonds.
979
+ if residue != chainResidues [0 ]:
980
+ hasExternal [(residue , 'N' )] = True
981
+ if residue != chainResidues [- 1 ]:
982
+ hasExternal [(residue , 'C' )] = True
983
+
861
984
# Loop over residues.
862
985
863
986
for chain in self .topology .chains ():
987
+ nucleic = any (res .name in dnaResidues or res .name in rnaResidues for res in chain .residues ())
864
988
chainResidues = list (chain .residues ())
865
989
for residue in chain .residues ():
866
- if residue .name in self .templates :
867
- template = self .templates [residue .name ]
990
+ template = self ._getTemplate (residue .name )
991
+ if template is not None :
992
+ # If an atom is marked as terminal only, and if it is bonded to any atom that has an external bond
993
+ # to another residue, we need to omit that atom and any other terminal-only atom bonded to it.
994
+
995
+ bondedTo = defaultdict (set )
996
+ for atom1 , atom2 in template .topology .bonds ():
997
+ bondedTo [atom1 ].add (atom2 )
998
+ bondedTo [atom2 ].add (atom1 )
999
+ skip = set ()
1000
+ for atom , terminal in zip (template .topology .atoms (), template .terminal ):
1001
+ if terminal :
1002
+ for atom2 in bondedTo [atom ]:
1003
+ if hasExternal [(residue , atom2 .name )]:
1004
+ skip .add (atom )
1005
+ for atom , terminal in zip (template .topology .atoms (), template .terminal ):
1006
+ if terminal :
1007
+ for atom2 in bondedTo [atom ]:
1008
+ if atom2 in skip :
1009
+ skip .add (atom )
868
1010
atomNames = set (atom .name for atom in residue .atoms ())
869
- templateAtoms = list ( template .topology .atoms ())
870
- if residue == chainResidues [0 ] and (chain .index , 0 ) not in self .missingResidues :
1011
+ templateAtoms = [ atom for atom in template .topology .atoms () if atom not in skip ]
1012
+ if nucleic and residue == chainResidues [0 ] and (chain .index , 0 ) not in self .missingResidues :
871
1013
templateAtoms = [atom for atom in templateAtoms if atom .name not in ('P' , 'OP1' , 'OP2' )]
872
1014
873
1015
# Add atoms from the template that are missing.
874
1016
875
1017
missing = []
876
1018
for atom in templateAtoms :
877
- if atom .name not in atomNames :
1019
+ if atom .name not in atomNames and atom . element != app . element . hydrogen :
878
1020
missing .append (atom )
879
1021
if len (missing ) > 0 :
880
1022
missingAtoms [residue ] = missing
@@ -1118,9 +1260,22 @@ def _downloadNonstandardDefinitions(self):
1118
1260
1119
1261
def _describeVariant (self , residue , definitions ):
1120
1262
"""Build the variant description to pass to addHydrogens() for a residue."""
1121
- if residue .name not in definitions :
1263
+ if residue .name not in app .PDBFile ._standardResidues and self ._getTemplate (residue .name ) is not None :
1264
+ # The user has registered a template for this residue. Use the hydrogens from it.
1265
+ template = self ._getTemplate (residue .name )
1266
+ atoms = [(atom .name , atom .element .symbol .upper (), terminal ) for atom , terminal in zip (template .topology .atoms (), template .terminal )]
1267
+ resAtoms = dict ((atom .name , atom ) for atom in residue .atoms ())
1268
+ bonds = []
1269
+ for atom1 , atom2 in template .topology .bonds ():
1270
+ if atom1 .element == app .element .hydrogen and atom2 .name in resAtoms :
1271
+ bonds .append ((atom1 .name , atom2 .name ))
1272
+ elif atom2 .element == app .element .hydrogen and atom1 .name in resAtoms :
1273
+ bonds .append ((atom2 .name , atom1 .name ))
1274
+ elif residue .name in definitions :
1275
+ # We downloaded a definition.
1276
+ atoms , bonds = definitions [residue .name ]
1277
+ else :
1122
1278
return None
1123
- atoms , bonds = definitions [residue .name ]
1124
1279
1125
1280
# See if the heavy atoms are identical.
1126
1281
0 commit comments