Skip to content

Commit 5cf78a2

Browse files
authoredJul 10, 2024··
Can add hydrogens to nonstandard residues (#295)
* Can add hydrogens to nonstandard residues * Build against OpenMM dev version * Fixed typo * Updated Python versions for CI * Added required quotes
1 parent 268f1d7 commit 5cf78a2

File tree

5 files changed

+1647
-8
lines changed

5 files changed

+1647
-8
lines changed
 

‎.github/workflows/CI.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
runs-on: ubuntu-latest
1414
strategy:
1515
matrix:
16-
python-version: [3.7, 3.8, 3.9]
16+
python-version: ["3.10", "3.11", "3.12"]
1717

1818
steps:
1919
- uses: actions/checkout@v2

‎devtools/environment-dev.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
name: pdbfixer-dev
22

33
channels:
4+
- conda-forge/label/openmm_dev
45
- conda-forge
56

67
dependencies:
78
- pytest
8-
- openmm
9+
- openmm=8.1.1dev0
910
- numpy
1011
- pip

‎pdbfixer/pdbfixer.py

+95-6
Original file line numberDiff line numberDiff line change
@@ -1058,10 +1058,8 @@ def addMissingHydrogens(self, pH=7.0, forcefield=None):
10581058
10591059
Notes
10601060
-----
1061-
No extensive electrostatic analysis is performed; only default residue pKas are used.
1062-
1063-
Examples
1064-
--------
1061+
No extensive electrostatic analysis is performed; only default residue pKas are used. The pH is only
1062+
taken into account for standard amino acids.
10651063
10661064
Examples
10671065
--------
@@ -1070,13 +1068,104 @@ def addMissingHydrogens(self, pH=7.0, forcefield=None):
10701068
10711069
>>> fixer = PDBFixer(pdbid='1VII')
10721070
>>> fixer.addMissingHydrogens(pH=8.0)
1073-
10741071
"""
1072+
extraDefinitions = self._downloadNonstandardDefinitions()
1073+
variants = [self._describeVariant(res, extraDefinitions) for res in self.topology.residues()]
10751074
modeller = app.Modeller(self.topology, self.positions)
1076-
modeller.addHydrogens(pH=pH, forcefield=forcefield)
1075+
modeller.addHydrogens(pH=pH, forcefield=forcefield, variants=variants)
10771076
self.topology = modeller.topology
10781077
self.positions = modeller.positions
10791078

1079+
def _downloadNonstandardDefinitions(self):
1080+
"""If the file contains any nonstandard residues, download their definitions and build
1081+
the information needed to add hydrogens to them.
1082+
"""
1083+
app.Modeller._loadStandardHydrogenDefinitions()
1084+
resnames = set(residue.name for residue in self.topology.residues())
1085+
definitions = {}
1086+
for name in resnames:
1087+
if name not in app.Modeller._residueHydrogens:
1088+
# Try to download the definition.
1089+
1090+
try:
1091+
file = urlopen(f'https://files.rcsb.org/ligands/download/{name}.cif')
1092+
contents = file.read().decode('utf-8')
1093+
file.close()
1094+
except:
1095+
continue
1096+
1097+
# Record the atoms and bonds.
1098+
1099+
from openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader
1100+
reader = PdbxReader(StringIO(contents))
1101+
data = []
1102+
reader.read(data)
1103+
block = data[0]
1104+
atomData = block.getObj('chem_comp_atom')
1105+
atomNameCol = atomData.getAttributeIndex('atom_id')
1106+
symbolCol = atomData.getAttributeIndex('type_symbol')
1107+
leavingCol = atomData.getAttributeIndex('pdbx_leaving_atom_flag')
1108+
atoms = [(row[atomNameCol], row[symbolCol].upper(), row[leavingCol] == 'Y') for row in atomData.getRowList()]
1109+
bondData = block.getObj('chem_comp_bond')
1110+
if bondData is None:
1111+
bonds = []
1112+
else:
1113+
atom1Col = bondData.getAttributeIndex('atom_id_1')
1114+
atom2Col = bondData.getAttributeIndex('atom_id_2')
1115+
bonds = [(row[atom1Col], row[atom2Col]) for row in bondData.getRowList()]
1116+
definitions[name] = (atoms, bonds)
1117+
return definitions
1118+
1119+
def _describeVariant(self, residue, definitions):
1120+
"""Build the variant description to pass to addHydrogens() for a residue."""
1121+
if residue.name not in definitions:
1122+
return None
1123+
atoms, bonds = definitions[residue.name]
1124+
1125+
# See if the heavy atoms are identical.
1126+
1127+
topologyHeavy = dict((atom.name, atom) for atom in residue.atoms() if atom.element is not None and atom.element != app.element.hydrogen)
1128+
definitionHeavy = dict((atom[0], atom) for atom in atoms if atom[1] != '' and atom[1] != 'H')
1129+
for name in topologyHeavy:
1130+
if name not in definitionHeavy or definitionHeavy[name][1] != topologyHeavy[name].element.symbol.upper():
1131+
# This atom isn't present in the definition
1132+
return None
1133+
for name in definitionHeavy:
1134+
if name not in topologyHeavy and not definitionHeavy[name][2]:
1135+
# This isn't a leaving atom, and it isn't present in the topology.
1136+
return None
1137+
1138+
# Build the list of hydrogens.
1139+
1140+
variant = []
1141+
definitionAtoms = dict((atom[0], atom) for atom in atoms)
1142+
topologyBonds = list(residue.bonds())
1143+
for name1, name2 in bonds:
1144+
if definitionAtoms[name1][1] == 'H':
1145+
h, parent = name1, name2
1146+
elif definitionAtoms[name2][1] == 'H':
1147+
h, parent = name2, name1
1148+
else:
1149+
continue
1150+
if definitionAtoms[h][2]:
1151+
# The hydrogen is marked as a leaving atom. Omit it if the parent is not present,
1152+
# or if the parent has an external bond.
1153+
if parent not in topologyHeavy:
1154+
continue
1155+
parentAtom = topologyHeavy[parent]
1156+
exclude = False
1157+
for atom1, atom2 in topologyBonds:
1158+
if atom1 == parentAtom and atom2.residue != residue:
1159+
exclude = True
1160+
break
1161+
if atom2 == parentAtom and atom1.residue != residue:
1162+
exclude = True
1163+
break
1164+
if exclude:
1165+
continue
1166+
variant.append((h, parent))
1167+
return variant
1168+
10801169
def addSolvent(self, boxSize=None, padding=None, boxVectors=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*unit.molar, boxShape='cube'):
10811170
"""Add a solvent box surrounding the structure.
10821171

‎pdbfixer/tests/data/1BHL.pdb

+1,523
Large diffs are not rendered by default.

‎pdbfixer/tests/test_add_hydrogens.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import pdbfixer
2+
from pathlib import Path
3+
from io import StringIO
4+
5+
def test_nonstandard():
6+
"""Test adding hydrogens to nonstandard residues."""
7+
content = (Path(__file__).parent / "data" / "4JSV.pdb").read_text()
8+
fixer = pdbfixer.PDBFixer(pdbfile=StringIO(content))
9+
fixer.removeChains(chainIndices=[0, 1, 2])
10+
fixer.addMissingHydrogens()
11+
for residue in fixer.topology.residues():
12+
count = sum(1 for atom in residue.atoms() if atom.element.symbol == 'H')
13+
if residue.name == 'ADP':
14+
assert count == 15
15+
if residue.name in ('MG', 'MGF'):
16+
assert count == 0
17+
18+
def test_leaving_atoms():
19+
"""Test adding hydrogens to a nonstandard residue with leaving atoms."""
20+
content = (Path(__file__).parent / "data" / "1BHL.pdb").read_text()
21+
fixer = pdbfixer.PDBFixer(pdbfile=StringIO(content))
22+
fixer.addMissingHydrogens()
23+
for residue in fixer.topology.residues():
24+
count = sum(1 for atom in residue.atoms() if atom.element.symbol == 'H')
25+
if residue.name == 'CAS':
26+
assert count == 10

0 commit comments

Comments
 (0)
Please sign in to comment.