From 22f6e132e0692da255453e5ad5087bdd59bdb75f Mon Sep 17 00:00:00 2001 From: Gordon Getzinger <16693333+gjgetzinger@users.noreply.github.com> Date: Thu, 16 Apr 2020 14:14:42 -0400 Subject: [PATCH] Defines `super_parent_syn` During compound standardization/registration, it can be helpful to preserve molecule identifiers for mapping synonyms or related molecules. This function mirrors the `super_parent` function, but captures InChI identifiers at each sanitization step and assigns them as properties of the sanitized molecule (either InChI or InChI Key). ```python smi = '[13CH3][C@@H](C)C/C=C([O-])\O.[Na+]' mol = Chem.MolFromSmiles(smi) mol_san = super_parent_syn(mol, 'inchikey') mol_san.GetPropsAsDict() ``` ``` {'fragment_inchikey': 'GCBSTXJCFNXFGB-DQIXCRJTSA-M', 'charge_inchikey': 'VBJZLOGJBQCFRJ-BBAZYXFWSA-M', 'isotope_inchikey': 'VBJZLOGJBQCFRJ-BBAZYXFWSA-N', 'stereo_inchikey': 'VBJZLOGJBQCFRJ-UHFFFAOYSA-N', 'tautomer_inchikey': 'VBJZLOGJBQCFRJ-UHFFFAOYSA-N', 'standardize_inchikey': 'FGKJLKRYENPLQH-UHFFFAOYSA-N', 'inchikey': 'FGKJLKRYENPLQH-UHFFFAOYSA-N'} ``` --- molvs/standardize.py | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/molvs/standardize.py b/molvs/standardize.py index ca3f585..e3ae588 100644 --- a/molvs/standardize.py +++ b/molvs/standardize.py @@ -211,7 +211,43 @@ def super_parent(self, mol, skip_standardize=False): mol = self.tautomer_parent(mol, skip_standardize=True) mol = self.standardize(mol) return mol - + + def super_parent_syn(self, mol, synonyms_as = 'inchikey', skip_standardize=False): + """Return the super parent of a given molecule with InChI(Key)s preserved as properties. + + :param mol: The input molecule. + :param synonyms_as: How should synonyms be kept? (inchi or inchikey) + :type mol: rdkit.Chem.rdchem.Mol + :param bool skip_standardize: Set to True if mol has already been standardized. + :returns: The super parent molecule. + :rtype: rdkit.Chem.rdchem.Mol + """ + if not skip_standardize: + mol = self.standardize(mol) + + inchi = {} + inchi['fragment_inchi'] = Chem.MolToInchi(mol) + mol = self.fragment_parent(mol, skip_standarize=True) + inchi['charge_inchi'] = Chem.MolToInchi(mol) + mol = self.charge_parent(mol, skip_standardize=True) + inchi['isotope_inchi'] = Chem.MolToInchi(mol) + mol = self.isotope_parent(mol, skip_standardize=True) + inchi['stereo_inchi'] = Chem.MolToInchi(mol) + mol = self.stereo_parent(mol, skip_standardize=True) + inchi['tautomer_inchi'] = Chem.MolToInchi(mol) + mol = self.tautomer_parent(mol, skip_standardize=True) + inchi['standardize_inchi'] = Chem.MolToInchi(mol) + mol = self.standardize(mol) + inchi['inchi'] = Chem.MolToInchi(mol) + + if synonyms_as == 'inchi': + [mol.SetProp(key,inchi[key]) for key in inchi] + else: + inchikey = {key+'key':Chem.InchiToInchiKey(inchi[key]) for key in inchi} + [mol.SetProp(key,inchikey[key]) for key in inchikey] + + return mol + def standardize_with_parents(self, mol): """""" standardized = self.standardize(mol)