Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/peppr/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from rdkit.Chem import (
AssignStereochemistryFrom3D,
BondType,
GetMolFrags,
Mol,
SanitizeFlags,
SanitizeMol,
Expand Down Expand Up @@ -1464,4 +1465,9 @@ def _to_mol(molecule: struc.AtomArray) -> Mol:
BondType.DOUBLE,
]:
bond.SetBondType(BondType.ONEANDAHALF)
frags = GetMolFrags(mol)
if len(frags) != 1:
raise struc.BadStructureError(
f"Molecule contains multiple disconnected fragments: {len(frags)}"
)
return mol
52 changes: 40 additions & 12 deletions src/peppr/sanitize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

__all__ = ["sanitize"]

from itertools import product
import rdkit
import rdkit.Chem.AllChem as Chem

Expand All @@ -24,7 +25,7 @@ def sanitize(mol: Chem.Mol, max_fix_iterations: int = 1000) -> None:
-----
Deals with cases:

- ``AtomValenceException``: add charge if total valence is exceeding default (for N, O atoms only)
- ``AtomValenceException``: add charge if total valence is exceeding default (for B, N, O atoms only)
- ``KekulizeException``: for ring N atoms with unspecified protonation
"""
mol.UpdatePropertyCache(strict=False)
Expand Down Expand Up @@ -59,7 +60,7 @@ def sanitize(mol: Chem.Mol, max_fix_iterations: int = 1000) -> None:

def _fix_problem(mol: Chem.Mol, problem: Exception) -> None:
"""
Apply fixes for common problems.
Apply fixes for common input problems with RDKit SanitizeMol.

Parameters
----------
Expand Down Expand Up @@ -109,17 +110,37 @@ def _fix_problem(mol: Chem.Mol, problem: Exception) -> None:
# currently not implemented
return
if problem.GetType() == "KekulizeException":
# hack: only works for nitrogens with missing explicit Hs
# hack: fix KekulizeException related to ring N atoms with unspecified
# protonation by setting one or more ring nitrogens as "[nH]"
possible_nHs = []
original_nH_state = []
for atidx in problem.GetAtomIndices():
at = mol.GetAtomWithIdx(atidx)
# set one of the nitrogens with two bonds in a ring system as "[nH]"
if (
at.GetSymbol() == "N"
and at.GetDegree() == 2
and at.GetTotalNumHs() == 0
):
at.SetNumExplicitHs(1)
break
if at.GetSymbol() == "N" and at.GetDegree() == 2:
possible_nHs.append(at)
original_nH_state.append(at.GetNumExplicitHs())

if not len(possible_nHs):
# no candidate nitrogens to fix the problem
# note this early return is unnecessary as the loops below will
# not be entered, but it makes the intention clearer
return

# if there is one or more candidates - iterate through all the possible
# combinations of explicit Hs for the candidate nitrogens
for combo in product([0, 1], repeat=len(possible_nHs)):
for nH, nH_explicit in zip(possible_nHs, combo):
nH.SetNumExplicitHs(nH_explicit)
# and check if the problem is resolved
check_problems = Chem.DetectChemistryProblems(
mol, sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_KEKULIZE
)
if not [p for p in check_problems if _is_same_problem(p, problem)]:
# problem is resolved - no need to try other combinations
return
# if all combos failed - reset explicit Hs back to the original values
for nH, nH_explicit in zip(possible_nHs, original_nH_state):
nH.SetNumExplicitHs(nH_explicit)


def _is_same_problem(problem1: Exception, problem2: Exception) -> bool:
Expand All @@ -143,6 +164,13 @@ def _is_same_problem(problem1: Exception, problem2: Exception) -> bool:
if problem1.GetAtomIdx() != problem2.GetAtomIdx():
return False
elif problem_type == "KekulizeException":
if problem1.GetAtomIndices() != problem2.GetAtomIndices():
if (
len(
set(problem1.GetAtomIndices()).intersection(
set(problem2.GetAtomIndices())
)
)
== 0
):
return False
return True
45 changes: 41 additions & 4 deletions tests/test_charge.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,6 @@ def test_estimate_formal_charges_from_smiles(smiles, ph, ref_charges):
# imine example
("MFG", 7.4, {}),
("MFG", 3, {"N10": 1}),
# tetrazole and imidazole motifs
("3GK", 7.4, {"N1": -1}),
("3GK", 5, {"N1": -1, "N21": 1}),
("3GK", 4, {"N21": 1}),
# thiophenol example
("JKE", 7.4, {"SD": -1, "OD2": -1}),
("JKE", 6, {"OD2": -1}),
Expand Down Expand Up @@ -133,6 +129,47 @@ def test_estimate_formal_charges(comp_name, ph, ref_charged_atoms, include_hydro
assert test_charged_atoms == ref_charged_atoms


@pytest.mark.parametrize(
["comp_name", "ph", "acceptatble_charge_combos"],
[
# tetrazole and imidazole motifs
("3GK", 7.4, [{"N1": -1}, {"N5": -1}]),
(
"3GK",
5,
[
{"N1": -1, "N21": 1},
{"N1": -1, "N23": 1},
{"N5": -1, "N21": 1},
{"N5": -1, "N23": 1},
],
),
("3GK", 4, [{"N21": 1}, {"N23": 1}]),
],
)
def test_estimate_formal_charges_heterocycles_nohydrogens(
comp_name, ph, acceptatble_charge_combos
):
"""
Check if formal charges are estimated correctly for more complex small molecules.
Especially when protonation is not specified and several options are possible.
"""
atoms = info.residue(comp_name)
# Do not rely on charges extracted from the CCD
atoms.del_annotation("charge")
# remove hydrogens to check if the method can estimate charges without relying on them
atoms = atoms[atoms.element != "H"]

charges = peppr.estimate_formal_charges(atoms, ph)
test_charged_atoms = {
atom_name: charge
for atom_name, charge in zip(atoms.atom_name, charges)
if charge != 0
}

assert test_charged_atoms in acceptatble_charge_combos


def test_estimate_formal_charges_peptide():
"""
Check if a peptide has the expected charges at the termini and the acidic/basic
Expand Down
22 changes: 13 additions & 9 deletions tests/test_contacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,13 @@ def test_salt_bridge_identification(
contact_measurement = peppr.ContactMeasurement(receptor, ligand, cutoff=4.0, ph=6.0)
test_contacts = contact_measurement.find_salt_bridges(use_resonance=use_resonance)

assert len(test_contacts) == len(expected_contacts)
if use_resonance:
# the expected use should recover all expected contacts
assert len(test_contacts) == len(expected_contacts)
else:
# If resonance is not considered, some contacts may be missed
assert len(test_contacts) <= len(expected_contacts)

for i, j in test_contacts:
contact = (
receptor.res_id[i].item(),
Expand Down Expand Up @@ -316,23 +322,21 @@ def test_find_resonance_charges():
ligand.set_annotation("charge", peppr.estimate_formal_charges(ligand, 7.4))
# create rdkit ligand object
ligand_mol = rdkit_interface.to_mol(ligand)
try:
peppr.sanitize(ligand_mol)
except Exception:
return np.nan
ligand_charged_atoms = np.where(ligand.charge != 0)[0]
assert np.equal(ligand_charged_atoms, [0, 7, 8]).all()
peppr.sanitize(ligand_mol)

charged_atoms = np.where(ligand.charge != 0)[0]
assert np.equal(charged_atoms, [0, 7, 8]).all()

# get charged atoms and their resonance groups
pos_mask, neg_mask, ligand_conjugated_groups = peppr.find_resonance_charges(
ligand_mol
)
assert len(set(ligand_conjugated_groups)) < len(ligand_conjugated_groups), (
"Number of groups expected less than number of atoms as some are conjugated"
"Number of groups should be less than number of atoms as some are conjugated"
)
charged_atom_mask = pos_mask | neg_mask
ligand_charged_in_resonance_atoms = np.where(charged_atom_mask)[0]
assert set(ligand_charged_atoms) != set(ligand_charged_in_resonance_atoms), (
assert set(charged_atoms) != set(ligand_charged_in_resonance_atoms), (
"Charged atoms do not match those found in resonance structures"
)
assert np.equal(ligand_charged_in_resonance_atoms, [0, 3, 6, 7, 8]).all()
39 changes: 37 additions & 2 deletions tests/test_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,16 @@ def test_unmatchable_molecules(use_heuristic):
reference = reference[reference.element != "H"]
_annotate_atom_order(reference)
pose = reference.copy()
# Remove bonds in one structure to make the bond graphs incompatible
pose.bonds = struc.BondList(pose.array_length())
bond_list = pose.bonds
bond_array = bond_list.as_array().copy()

# shuffle the bond types to create an incompatible bond graph
bond_array[:, 2] = np.random.permutation(bond_array[:, 2])
pose.bonds = struc.BondList(bond_list.get_atom_count(), bond_array)
_annotate_atom_order(pose)
assert not np.all(
reference.bonds == pose.bonds
) # Sanity check to make sure the bond lists are different

with pytest.warns(peppr.GraphMatchWarning, match="Incompatible bond graph"):
matched_reference, matched_pose = peppr.find_optimal_match(
Expand Down Expand Up @@ -628,3 +636,30 @@ def _check_match(
matched_reference.atom_id
)
assert len(np.unique(matched_pose.atom_id)) == len(matched_pose.atom_id)


@pytest.mark.parametrize("disconnected", [True, False])
def test__to_mol_disconnected(disconnected: bool) -> None:
"""
Test that disconnected mol in rdKit is raising exception

We are creating a molecule with just appending a ligand to another
if `disconnected` is True

Parameters
----------
disconnected: bool
Whether test a case of disconnected mol
"""
ccd = "BTN"
mol_atom_array = struc.info.residue(ccd)
if disconnected:
mol_atom_array += struc.info.residue(ccd)
with pytest.raises(
struc.BadStructureError,
match="Molecule contains multiple disconnected fragments",
):
mol = peppr.match._to_mol(mol_atom_array)
else:
mol = peppr.match._to_mol(mol_atom_array)
assert mol.GetNumAtoms() == len(mol_atom_array), f"Failed for {ccd}"
1 change: 1 addition & 0 deletions tests/test_sanitize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"smiles",
[
"c1ccc2nc3c(c(=O)c2c1)cccc3",
"O=c1c(cccc2)c2c(Cc3ccccc3)nn1",
"n1cccc1",
"CN(C)(C)C",
"C1CCCC=O1",
Expand Down