diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 37ecd41a..b8704a22 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -9,8 +9,10 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macOS-latest] - python-version: [3.8, 3.9] + os: [ubuntu-latest, ] + python-version: [3.9, ] + #os: [ubuntu-latest, macOS-latest] No need to make multiple releases anymore + #python-version: [3.8, 3.9] No need to make multiple releases anymore name: Python ${{ matrix.python-version }} at ${{ matrix.os }} steps: - uses: actions/checkout@v4 diff --git a/docs/releasehistory.rst b/docs/releasehistory.rst index 62e1480a..3b4c2ce8 100644 --- a/docs/releasehistory.rst +++ b/docs/releasehistory.rst @@ -8,14 +8,28 @@ Releases follow the ``major.minor.micro`` scheme recommended by `PEP440 `_: fo not uniquify substructures in alchemical alignment +- `PR #184 `_: do not uniquify substructures in alchemical alignment - `PR #184 `_: apply 1-4 terms exclusion in alchemical topologies - `PR #184 `_: replace topology propers and impropers - `PR #184 `_: make dihedral's hash hashable diff --git a/peleffy/main.py b/peleffy/main.py index 2bdcb03c..5cea1db7 100644 --- a/peleffy/main.py +++ b/peleffy/main.py @@ -75,11 +75,18 @@ def parse_args(args): parser.add_argument("--chain", metavar="CHAIN", type=str, help="Chain of the molecule to parameterize", default=None) + parser.add_argument("--residue_id", metavar="RESIDUE_ID", + type=int, help="Residue ID of the molecule to " + + "parameterize", default=None) parser.add_argument('--include_terminal_rotamers', dest="include_terminal_rotamers", action='store_true', help="Not exclude terminal rotamers " + "when building the rotamer library") + parser.add_argument('--allow_undefined_stereo', + dest="allow_undefined_stereo", + action='store_true', + help="Allow undefined stereochemistry") parser.add_argument('-s', '--silent', dest="silent", action='store_true', @@ -100,6 +107,7 @@ def parse_args(args): parser.set_defaults(as_datalocal=False) parser.set_defaults(with_solvent=False) parser.set_defaults(include_terminal_rotamers=False) + parser.set_defaults(include_terminal_rotamers=False) parser.set_defaults(silent=False) parser.set_defaults(debug=False) parser.set_defaults(for_amber=False) @@ -138,7 +146,9 @@ def run_peleffy(pdb_file, charge_method=DEFAULT_CHARGE_METHOD, charges_from_file=None, chain=None, + residue_id=None, exclude_terminal_rotamers=True, + allow_undefined_stereo=False, output=None, with_solvent=False, as_datalocal=False, conformation_path=None, for_amber=False): @@ -160,9 +170,16 @@ def run_peleffy(pdb_file, The file containing the partial charges to assign to the molecule. Default is None chain : str - Chain to the molecule if the PDB contains multiple molecules. + Chain to the molecule if the PDB contains multiple molecules + residue_id : int + Residue ID of the molecule to parameterize. If other residues are + present in the PDB and a covalent bond is detected between the + selected residue and another residue, a covalent template will be + generated exclude_terminal_rotamers : bool Whether to exclude terminal rotamers or not + allow_undefined_stereo : bool + Whether to allow undefined stereochemistry or not output : str Path where output files will be saved with_solvent : bool @@ -195,12 +212,17 @@ def run_peleffy(pdb_file, log.info('-' * 60) log.info(' - General:') log.info(' - Input PDB:', pdb_file) + if chain is not None: + log.info(' - Chain:', chain) + if residue_id is not None: + log.info(' - Residue ID:', residue_id) log.info(' - Output path:', output) log.info(' - Write solvent parameters:', with_solvent) log.info(' - DataLocal-like output:', as_datalocal) log.info(' - Parameterization:') log.info(' - Force field:', forcefield_name) log.info(' - Charge method:', charge_method_str) + log.info(' - Allow undefined stereo:', allow_undefined_stereo) log.info(' - For AMBER:', for_amber) log.info(' - Rotamer library:') log.info(' - Resolution:', resolution) @@ -224,14 +246,37 @@ def run_peleffy(pdb_file, molecules = PDBreader.get_molecules_from_chain( selected_chain=chain, rotamer_resolution=resolution, - exclude_terminal_rotamers=exclude_terminal_rotamers) + exclude_terminal_rotamers=exclude_terminal_rotamers, + allow_undefined_stereo=allow_undefined_stereo) - if PDBreader.is_unique(molecules): - molecule, = molecules - else: - raise ValueError('The selected chain must only contain a single ' - + 'molecule to be parameterized.') + if PDBreader.is_unique(molecules): + molecule, = molecules + elif residue_id is not None: + molecule = PDBreader.get_molecule_from_residue_id( + molecules, + selected_residue_id=residue_id, + rotamer_resolution=resolution, + exclude_terminal_rotamers=exclude_terminal_rotamers, + allow_undefined_stereo=allow_undefined_stereo) + + covalent_molecules = PDBreader.get_covalent_molecules_to_residue_id( + molecule, + molecules) + + if len(covalent_molecules) > 0: + print(' - Covalent bond detected between the selected molecule ' + + 'and the following residue(s):') + for covalent_molecule in covalent_molecules: + print(' - {}'.format(covalent_molecule.tag)) + + # Prepare molecule + molecule = Molecule.from_molecule_bonded_to_residues(molecule, + covalent_molecules) + else: + raise ValueError('The selected chain must only contain a single ' + + 'molecule to be parameterized or a residue_id ' + + 'must be specified.') else: if PDBreader.is_complex: raise ValueError('To parameterize a ligand from a protein-ligand ' @@ -239,7 +284,8 @@ def run_peleffy(pdb_file, else: molecule = Molecule( pdb_file, rotamer_resolution=resolution, - exclude_terminal_rotamers=exclude_terminal_rotamers) + exclude_terminal_rotamers=exclude_terminal_rotamers, + allow_undefined_stereo=allow_undefined_stereo) # Initialize force field ff_selector = ForceFieldSelector() @@ -298,7 +344,7 @@ def run_peleffy(pdb_file, log.info('-' * 60) -def main(args): +def main(args=None): """ It reads the command-line arguments and runs peleffy. @@ -316,6 +362,9 @@ def main(args): -r 30 -o output_path/ --with_solvent --as_datalocal -c gasteiger """ + if args is None: + import sys + args = parse_args(sys.argv[1:]) exclude_terminal_rotamers = not args.include_terminal_rotamers @@ -337,16 +386,16 @@ def main(args): resolution=args.resolution, charge_method=args.charge_method, exclude_terminal_rotamers=exclude_terminal_rotamers, + allow_undefined_stereo=args.allow_undefined_stereo, output=args.output, with_solvent=args.with_solvent, as_datalocal=args.as_datalocal, chain=args.chain, + residue_id=args.residue_id, conformation_path=args.conformations_info_path, charges_from_file=args.charges_from_file, for_amber=args.for_amber) if __name__ == '__main__': - import sys - args = parse_args(sys.argv[1:]) - main(args) + main() diff --git a/peleffy/topology/molecule.py b/peleffy/topology/molecule.py index c1391569..2d482220 100644 --- a/peleffy/topology/molecule.py +++ b/peleffy/topology/molecule.py @@ -20,7 +20,7 @@ def __init__(self, path=None, smiles=None, pdb_block=None, rotamer_resolution=30,exclude_terminal_rotamers=True, name='', tag='UNK', connectivity_template=None, core_constraints=[], allow_undefined_stereo=False, hydrogens_are_explicit=True, - fix_pdb=True): + fix_pdb=True, residue_id=None): """ It initializes a Molecule object through a PDB file or a SMILES tag. @@ -60,6 +60,10 @@ def __init__(self, path=None, smiles=None, pdb_block=None, fix_pdb : bool Activates or deactivate the PDB fixer that is executed prior parsing it + residue_id : int + The residue number of the molecule in the PDB file when + multiple residues are present when parameterizing + covanlently bonded molecules. Default is None Examples -------- @@ -144,6 +148,7 @@ def __init__(self, path=None, smiles=None, pdb_block=None, self._allow_undefined_stereo = allow_undefined_stereo self._hydrogens_are_explicit = hydrogens_are_explicit self._fix_pdb = fix_pdb + self._residue_id = residue_id # Deactivate OpenForceField toolkit warnings import logging @@ -534,7 +539,8 @@ def from_rdkit(rdkit_molecule, rotamer_resolution=30, exclude_terminal_rotamers=True, name='', tag='UNK', connectivity_template=None, core_constraints=[], allow_undefined_stereo=False, - hydrogens_are_explicit=True): + hydrogens_are_explicit=True, + residue_id=None): """ It initializes and returns a peleffy Molecule representation from an RDKit molecular representation. @@ -570,7 +576,11 @@ def from_rdkit(rdkit_molecule, rotamer_resolution=30, Whether the input molecule has explicit information about hydrogen atoms or not. Otherwise, they will be added when the molecule is built. Default is True - + residue_id : int + The residue number of the molecule in the PDB file when + multiple residues are present when parameterizing + covanlently bonded molecules. Default is None + Returns ------- molecule : an peleffy.topology.Molecule @@ -591,13 +601,14 @@ def from_rdkit(rdkit_molecule, rotamer_resolution=30, """ molecule = Molecule( - rotamer_resolution=30, + rotamer_resolution=rotamer_resolution, exclude_terminal_rotamers=exclude_terminal_rotamers, name=name, tag=tag, connectivity_template=connectivity_template, core_constraints=core_constraints, allow_undefined_stereo=allow_undefined_stereo, - hydrogens_are_explicit=hydrogens_are_explicit) + hydrogens_are_explicit=hydrogens_are_explicit, + residue_id=residue_id) logger = Logger() @@ -622,7 +633,8 @@ def from_openff(openff_molecule, rotamer_resolution=30, exclude_terminal_rotamers=True, name='', tag='UNK', connectivity_template=None, core_constraints=[], allow_undefined_stereo=False, - hydrogens_are_explicit=True): + hydrogens_are_explicit=True, + residue_id=None): """ It initializes and returns a peleffy Molecule representation from an OpenForceField molecular representation. @@ -658,6 +670,10 @@ def from_openff(openff_molecule, rotamer_resolution=30, Whether the input molecule has explicit information about hydrogen atoms or not. Otherwise, they will be added when the molecule is built. Default is True + residue_id : int + The residue number of the molecule in the PDB file when + multiple residues are present when parameterizing + covanlently bonded molecules. Default is None Returns ------- @@ -682,13 +698,14 @@ def from_openff(openff_molecule, rotamer_resolution=30, name = openff_molecule.name molecule = Molecule( - rotamer_resolution=30, + rotamer_resolution=rotamer_resolution, exclude_terminal_rotamers=exclude_terminal_rotamers, name=name, tag=tag, connectivity_template=connectivity_template, core_constraints=core_constraints, allow_undefined_stereo=allow_undefined_stereo, - hydrogens_are_explicit=hydrogens_are_explicit) + hydrogens_are_explicit=hydrogens_are_explicit, + residue_id=residue_id) logger = Logger() @@ -704,6 +721,152 @@ def from_openff(openff_molecule, rotamer_resolution=30, molecule._build_rotamers() return molecule + + @classmethod + def from_molecule_bonded_to_residues(cls, molecule, + bound_residues): + """ + It initializes a molecule covalently bonded to a set of + residues. + + Parameters + ---------- + molecule : an peleffy.topology.Molecule object + The molecule to be covalently bonded to the residues + bound_residues : list[peleffy.topology.Molecule] + The list of residues to be covalently bonded to the molecule + """ + from peleffy.utils.toolkits import RDKitToolkitWrapper + + # Initialize RDKit toolkit + rdkit_toolkit = RDKitToolkitWrapper() + + # Join the molecule and the residues in an RDKIT molecule + # object + whole_molecule = molecule.rdkit_molecule + for residue in bound_residues: + whole_molecule = rdkit_toolkit.combine_molecules( + whole_molecule, + residue.rdkit_molecule) + + # Get molecule residue number + molecule_residue_number = rdkit_toolkit.get_residue_id(molecule) + + # Detect the bonds between the molecule and the residues + bonded_atoms = [] + for atom in molecule.rdkit_molecule.GetAtoms(): + atom_idx = atom.GetIdx() + + whole_molecule_atom = whole_molecule.GetAtomWithIdx(atom_idx) + for bond in whole_molecule_atom.GetBonds(): + other_atom = bond.GetOtherAtom(whole_molecule_atom) + + other_atom_residue_number = other_atom.GetPDBResidueInfo().GetResidueNumber() + if other_atom_residue_number != molecule_residue_number: + bonded_atoms.append(other_atom) + + # Get more bonded atoms, up to a depth of 2 + depth = 2 + for _ in range(depth): + new_bonded_atoms = [] + for atom in bonded_atoms: + for bond in atom.GetBonds(): + other_atom = bond.GetOtherAtom(atom) + if (other_atom.GetPDBResidueInfo().GetResidueNumber() != molecule_residue_number + and other_atom not in bonded_atoms): + new_bonded_atoms.append(other_atom) + bonded_atoms += new_bonded_atoms + + # Remove all other atoms + from rdkit import Chem + new_molecule = Chem.RWMol(whole_molecule) + atom_indices_to_remove = [] + bonded_atom_idxs = [atom.GetIdx() for atom in bonded_atoms] + for atom in whole_molecule.GetAtoms(): + if atom.GetIdx() not in bonded_atom_idxs and atom.GetPDBResidueInfo().GetResidueNumber() != molecule_residue_number: + atom_indices_to_remove.append(atom.GetIdx()) + + for atom_idx in sorted(atom_indices_to_remove, reverse=True): + new_molecule.RemoveAtom(atom_idx) + + whole_molecule = new_molecule.GetMol() + + # Ensure bonded atoms are not marked as aromatic + for atom in whole_molecule.GetAtoms(): + if atom.GetPDBResidueInfo().GetResidueNumber() != molecule_residue_number: + atom.SetIsAromatic(False) + + for bond in atom.GetBonds(): + bond.SetIsAromatic(False) + + if bond.GetBondType() == Chem.BondType.AROMATIC: + bond.SetBondType(Chem.BondType.SINGLE) + + # Add missing hydrogen atoms to bonded atoms + from rdkit import Chem + Chem.SanitizeMol(whole_molecule) + atoms_with_missing_hydrogens = [] + for atom in whole_molecule.GetAtoms(): + if atom.GetPDBResidueInfo().GetResidueNumber() != molecule_residue_number: + atoms_with_missing_hydrogens.append(atom.GetIdx()) + whole_molecule = Chem.AddHs(whole_molecule, + onlyOnAtoms=atoms_with_missing_hydrogens, + addCoords=True) + + # Add PDB information to the new hydrogen atoms + pdb_atom_names = list() + for atom in whole_molecule.GetAtoms(): + if atom.GetPDBResidueInfo() is None: + atom.SetMonomerInfo(Chem.AtomPDBResidueInfo()) + + if len(atom.GetNeighbors()) == 0: + raise ValueError('Atom with no neighbors found') + + # Get information from bonded atom + bonded_atom = atom.GetNeighbors()[0] + atom.GetPDBResidueInfo().SetChainId(bonded_atom.GetPDBResidueInfo().GetChainId()) + atom.GetPDBResidueInfo().SetResidueNumber(bonded_atom.GetPDBResidueInfo().GetResidueNumber()) + atom.GetPDBResidueInfo().SetResidueName(bonded_atom.GetPDBResidueInfo().GetResidueName()) + atom.GetPDBResidueInfo().SetIsHeteroAtom(bonded_atom.GetPDBResidueInfo().GetIsHeteroAtom()) + + else: + pdb_atom_names.append(atom.GetPDBResidueInfo().GetName()) + + # Set unique PDB atom names to atoms belonging to different residues + name_index = 1 + for atom in whole_molecule.GetAtoms(): + if atom.GetPDBResidueInfo().GetResidueNumber() != molecule_residue_number: + atom_symbol = atom.GetSymbol() + tentative_name = '{:^4}'.format('{}{}'.format(atom_symbol, name_index)) + while tentative_name in pdb_atom_names: + name_index += 1 + tentative_name = '{:^4}'.format('{}{}'.format(atom_symbol, name_index)) + atom.GetPDBResidueInfo().SetName(tentative_name) + pdb_atom_names.append(tentative_name) + + # Reorder the atoms in the molecule by the residue number + residue_numbers = set() + for atom in whole_molecule.GetAtoms(): + residue_numbers.add(atom.GetPDBResidueInfo().GetResidueNumber()) + + new_order = [] + for residue_number in sorted(list(residue_numbers)): + for atom in whole_molecule.GetAtoms(): + if atom.GetPDBResidueInfo().GetResidueNumber() == residue_number: + new_order.append(atom.GetIdx()) + + whole_molecule = rdkit_toolkit.reorder_atoms(whole_molecule, new_order) + + return cls.from_rdkit(whole_molecule, + rotamer_resolution=molecule.rotamer_resolution, + exclude_terminal_rotamers=molecule.exclude_terminal_rotamers, + name=molecule.name, + tag=molecule.tag, + connectivity_template=molecule.connectivity_template, + core_constraints=molecule.core_constraints, + allow_undefined_stereo=molecule.allow_undefined_stereo, + hydrogens_are_explicit=molecule.hydrogens_are_explicit, + residue_id=molecule_residue_number) @property def rotamer_resolution(self): diff --git a/peleffy/topology/rotamer.py b/peleffy/topology/rotamer.py index ba65034c..9f323c9f 100644 --- a/peleffy/topology/rotamer.py +++ b/peleffy/topology/rotamer.py @@ -256,6 +256,11 @@ def _compute_rotamer_graph(self): + 'molecule\'s atoms' for atom, name in zip(rdkit_molecule.GetAtoms(), atom_names): + # If atom belongs to another residue, skip it + if self.molecule._residue_id is not None: + if atom.GetPDBResidueInfo().GetResidueNumber() != self.molecule._residue_id: + continue + self.add_node(atom.GetIdx(), pdb_name=name, nrot_neighbors=list()) @@ -263,6 +268,10 @@ def _compute_rotamer_graph(self): atom1 = bond.GetBeginAtomIdx() atom2 = bond.GetEndAtomIdx() + # Skip bonds that do not belong to the molecule + if atom1 not in self.nodes or atom2 not in self.nodes: + continue + if (frozenset([atom1, atom2]) in rot_bonds_atom_ids): rotatable = True else: @@ -275,6 +284,10 @@ def _compute_rotamer_graph(self): weight=int(rotatable)) for i, j in rot_bonds_atom_ids: + # If atom does not belong to the molecule, skip it + if i not in self.nodes or j not in self.nodes: + continue + self[i][j]['weight'] = 1 self.nodes[i]['rotatable'] = True self.nodes[j]['rotatable'] = True diff --git a/peleffy/topology/topology.py b/peleffy/topology/topology.py index e01d42a0..22f94864 100644 --- a/peleffy/topology/topology.py +++ b/peleffy/topology/topology.py @@ -109,6 +109,13 @@ def _build_atoms(self): for index, (atom_name, atom_type, sigma, epsilon, charge, SGB_radius, vdW_radius, gamma, alpha) \ in enumerate(self.parameters.atom_iterator): + + # Skip atom if it does not belong to the molecule + # Needed when multiple residues are present when + # parameterizing covanlently bonded molecules + if index not in self.molecule.graph.nodes: + continue + atom = Atom(index=index, PDB_name=atom_name, OPLS_type=atom_type, @@ -158,11 +165,25 @@ def _build_atoms(self): for atom in self.atoms: parent_idx = parent_idxs[atom.index] if parent_idx is not None: - atom.set_parent(self.atoms[parent_idx]) + for parent in self.atoms: + if parent.index == parent_idx: + atom.set_parent(parent) + break + else: + logger.error(['Error: parent atom not found for ' + + f'atom {atom.index} in molecule ' + + f'{self.molecule.name}']) def _build_bonds(self): """It builds the bonds of the molecule.""" for index, bond in enumerate(self.parameters['bonds']): + # Skip bond if it does not belong to the molecule + # Needed when multiple residues are present when + # parameterizing covanlently bonded molecules + if (bond['atom1_idx'] not in self.molecule.graph.nodes or + bond['atom2_idx'] not in self.molecule.graph.nodes): + continue + bond = Bond(index=index, atom1_idx=bond['atom1_idx'], atom2_idx=bond['atom2_idx'], @@ -173,6 +194,15 @@ def _build_bonds(self): def _build_angles(self): """It builds the angles of the molecule.""" for index, angle in enumerate(self.parameters['angles']): + # Skip angle if it does not belong to the molecule + # Needed when multiple residues are present when + # parameterizing covanlently bonded molecules + if (angle['atom1_idx'] not in self.molecule.graph.nodes or + angle['atom2_idx'] not in self.molecule.graph.nodes or + angle['atom3_idx'] not in self.molecule.graph.nodes): + continue + + angle = Angle(index=index, atom1_idx=angle['atom1_idx'], atom2_idx=angle['atom2_idx'], @@ -184,6 +214,15 @@ def _build_angles(self): def _build_propers(self): """It builds the propers of the molecule.""" for index, proper in enumerate(self.parameters['propers']): + # Skip proper if it does not belong to the molecule + # Needed when multiple residues are present when + # parameterizing covanlently bonded molecules + if (proper['atom1_idx'] not in self.molecule.graph.nodes or + proper['atom2_idx'] not in self.molecule.graph.nodes or + proper['atom3_idx'] not in self.molecule.graph.nodes or + proper['atom4_idx'] not in self.molecule.graph.nodes): + continue + off_proper = OFFProper(atom1_idx=proper['atom1_idx'], atom2_idx=proper['atom2_idx'], atom3_idx=proper['atom3_idx'], @@ -254,6 +293,15 @@ def _handle_excluded_propers(self): def _build_impropers(self): """It builds the impropers of the molecule.""" for index, improper in enumerate(self.parameters['impropers']): + # Skip improper if it does not belong to the molecule + # Needed when multiple residues are present when + # parameterizing covanlently bonded molecules + if (improper['atom1_idx'] not in self.molecule.graph.nodes or + improper['atom2_idx'] not in self.molecule.graph.nodes or + improper['atom3_idx'] not in self.molecule.graph.nodes or + improper['atom4_idx'] not in self.molecule.graph.nodes): + continue + off_improper = OFFImproper(atom1_idx=improper['atom1_idx'], atom2_idx=improper['atom2_idx'], atom3_idx=improper['atom3_idx'], diff --git a/peleffy/utils/input.py b/peleffy/utils/input.py index d900b65c..c7f7ad23 100644 --- a/peleffy/utils/input.py +++ b/peleffy/utils/input.py @@ -42,9 +42,10 @@ def __init__(self, path): self.pdb_content = pdb_file.readlines() def _extract_molecules_from_chain(self, chain, rotamer_resolution, - exclude_terminal_rotamers, - allow_undefined_stereo, - core_constraints): + exclude_terminal_rotamers, + allow_undefined_stereo, + core_constraints, + only_hetero=True): """ It extracts all hetero molecules found in the selected the chain of a PDB file. @@ -68,6 +69,9 @@ def _extract_molecules_from_chain(self, chain, rotamer_resolution, the core will be forced to contain them. Atoms can be specified through integers that match the atom index or strings that match with the atom PDB name + only_hetero : bool + Whether to extract only hetero molecules or not. + Default is True Returns ------- @@ -78,20 +82,20 @@ def _extract_molecules_from_chain(self, chain, rotamer_resolution, # Check if there is more than one hetero molecule in the same chain residues_ids = set([line[22:26].strip() for line in self.pdb_content - if line.startswith('HETATM') + if (line.startswith('HETATM') or line.startswith('ATOM') and only_hetero) and line[21:22] == chain and not line[17:20].strip() == 'HOH']) molecules = [] for residue_id in residues_ids: res_name = set([line[17:20].strip() for line in self.pdb_content - if line.startswith('HETATM') + if (line.startswith('HETATM') or line.startswith('ATOM') and only_hetero) and line[21:22] == chain and line[22:26].strip() == residue_id]) # Select which atoms compose this hetero molecule atom_ids = [line[6:11].strip() for line in self.pdb_content - if line.startswith('HETATM') + if (line.startswith('HETATM') or line.startswith('ATOM') and only_hetero) and line[21:22] == chain and line[22:26].strip() == residue_id] @@ -99,7 +103,8 @@ def _extract_molecules_from_chain(self, chain, rotamer_resolution, pdb_block = [] for line in self.pdb_content: - if line.startswith('HETATM') and line[6:11].strip() in atom_ids: + if ((line.startswith('HETATM') or line.startswith('ATOM') and only_hetero) + and line[6:11].strip() in atom_ids): pdb_block.append(line) if line.startswith('CONECT'): @@ -203,7 +208,7 @@ def get_molecules_from_chain(self, selected_chain, rotamer_resolution=30, Parameters ---------- selected_chain : str - Chain Id. + Chain Id rotamer_resolution : float The resolution in degrees to discretize the rotamer's conformational space. Default is 30 @@ -222,24 +227,16 @@ def get_molecules_from_chain(self, selected_chain, rotamer_resolution=30, selected chain """ - chain_ids = set([line[21:22] for line in self.pdb_content - if line.startswith('HETATM') - and not line[17:20].strip() == 'HOH']) - all_chain_ids = set([line[21:22] for line in self.pdb_content - if line.startswith('ATOM') - or line.startswith('HETATM')]) + chain_ids = set([line[21:22] for line in self.pdb_content + if line.startswith('ATOM') + or line.startswith('HETATM')]) - if not selected_chain in all_chain_ids: + if not selected_chain in chain_ids: raise ValueError('The selected chain {} '.format(selected_chain) + 'is not a valid chain for this PDB. Available ' + 'chains to select are: {}'.format(chain_ids)) - if not selected_chain in chain_ids and selected_chain in all_chain_ids: - raise ValueError('The selected chain {} '.format(selected_chain) - + 'is not an hetero molecule. Peleffy ' - + 'is only compatible with hetero atoms.') - molecules = self._extract_molecules_from_chain( chain=selected_chain, rotamer_resolution=rotamer_resolution, @@ -248,6 +245,115 @@ def get_molecules_from_chain(self, selected_chain, rotamer_resolution=30, core_constraints=core_constraints) return molecules + + def get_molecule_from_residue_id(self, molecules_in_chain, selected_residue_id, + rotamer_resolution=30, + exclude_terminal_rotamers=True, + allow_undefined_stereo=False, + core_constraints=[]): + """ + It returns all hetero molecule defined in a specific chain from the + PDB file. It handles the possible errors when selecting the chain + for a PDB. + + Parameters + ---------- + molecules_in_chain : list[peleffy.topology.Molecule] + List of peleffy's Molecule objects to search for the selected + residue id + selected_residue_id : int + Residue Id + rotamer_resolution : float + The resolution in degrees to discretize the rotamer's + conformational space. Default is 30 + exclude_terminal_rotamers : bool + Whether to exclude terminal rotamers when generating the + rotamers library or not + allow_undefined_stereo : bool + Whether to allow a molecule with undefined stereochemistry + to be defined or try to assign the stereochemistry and + raise a complaint if not possible. Default is False + + Returns + ------- + molecule : peleffy.topology.Molecule + The peleffy's Molecule object corresponding to the + selected residue id + """ + from peleffy.utils.toolkits import RDKitToolkitWrapper + + # Initialize RDKit toolkit + rdkit = RDKitToolkitWrapper() + + # Get selected chain + selected_chain = rdkit.get_chain_id(molecules_in_chain[0]) + + if selected_chain is None: + raise ValueError('The selected chain is undefined') + + residue_ids = set([line[22:26].strip() for line in self.pdb_content + if line.startswith('ATOM') or line.startswith('HETATM') + and line[21:22] == selected_chain + and not line[17:20].strip() == 'HOH']) + + if len(residue_ids) == 0: + raise ValueError('The selected chain {} '.format(selected_chain) + + 'does not contain any residues') + + for molecule, residue_id in zip(molecules_in_chain, residue_ids): + if residue_id == str(selected_residue_id): + return molecule + + raise ValueError('The selected chain {} '.format(selected_chain) + + 'does not contain a residue with id ' + + '{}'.format(selected_residue_id)) + + def get_covalent_molecules_to_residue_id(self, molecule, target_molecules): + """ + It returns molecules defined in the input list that are + covalently bonded to the selected molecule. + + Parameters + ---------- + molecule : peleffy.topology.Molecule + The peleffy's Molecule object corresponding to the selected + residue id + target_molecules : list[peleffy.topology.Molecule] + List of peleffy's Molecule objects to search for the selected + residue id + + Returns + ------- + covalent_molecules : list[peleffy.topology.Molecule] + The list of peleffy's Molecule objects corresponding to the + selected residue id + """ + import numpy as np + from peleffy.utils.toolkits import RDKitToolkitWrapper + + # Initialize RDKit toolkit + rdkit = RDKitToolkitWrapper() + + # Extract coordinates of the selected residue + molecule_coordinates = rdkit.get_coordinates(molecule) + + # Iterate over all molecules to find the covalent ones + covalent_molecules = [] + for target_molecule in target_molecules: + if rdkit.get_residue_id(target_molecule) == rdkit.get_residue_id(molecule): + continue + + # Extract coordinates of the molecule + target_molecule_coordinates = rdkit.get_coordinates(target_molecule) + + # Check if the molecule is covalently bonded to the selected residue + distances = np.linalg.norm(molecule_coordinates[:, np.newaxis, :] - target_molecule_coordinates[np.newaxis, :, :], axis=2) + lowest_distance = np.min(distances) + + if lowest_distance <= 1.5: + covalent_molecules.append(target_molecule) + + return covalent_molecules @property def is_complex(self): diff --git a/peleffy/utils/toolkits.py b/peleffy/utils/toolkits.py index fd662f8e..934caa4b 100644 --- a/peleffy/utils/toolkits.py +++ b/peleffy/utils/toolkits.py @@ -245,6 +245,60 @@ def set_conformer(self, molecule, conformer): # Add current conformer rdkit_molecule.AddConformer(conformer, assignId=True) + + def get_chain_id(self, molecule): + """ + It returns the chain id of the molecule according to the RDKit + molecule object. + + Parameters + ---------- + molecule : an peleffy.topology.Molecule + The peleffy's Molecule object + + Returns + ------- + chain_id : str + The chain id of the molecule + """ + rdkit_molecule = molecule.rdkit_molecule + + first_atom = list(rdkit_molecule.GetAtoms())[0] + + # Catch a None return + try: + chain_id = first_atom.GetPDBResidueInfo().GetChainId() + except AttributeError: + chain_id = None + + return chain_id + + def get_residue_id(self, molecule): + """ + It returns the residue id of the molecule according to the RDKit + molecule object. + + Parameters + ---------- + molecule : an peleffy.topology.Molecule + The peleffy's Molecule object + + Returns + ------- + residue_id : int + The residue id of the molecule + """ + rdkit_molecule = molecule.rdkit_molecule + + first_atom = list(rdkit_molecule.GetAtoms())[0] + + # Catch a None return + try: + residue_id = first_atom.GetPDBResidueInfo().GetResidueNumber() + except AttributeError: + residue_id = None + + return residue_id def get_residue_name(self, molecule): """ @@ -912,6 +966,97 @@ def align_molecules(self, mol1, mol2, atom_mapping=None): return mol2 + def reorder_atoms(self, molecule, new_order): + """ + Reorder atoms in an RDKit molecule. + + Parameters + ---------- + molecule : rdkit.Chem.rdchem.Mol + The RDKit molecule to reorder. + new_order : list[int] + The new order of atom indices. + + Returns + ------- + reordered_molecule : rdkit.Chem.rdchem.Mol + The RDKit molecule with reordered atoms. + """ + from rdkit import Chem + + reordered_molecule = Chem.RenumberAtoms(molecule, new_order) + return reordered_molecule + + def combine_molecules(self, mol1, mol2): + """ + Combine two RDKit molecules into a single molecule. + + Parameters + ---------- + mol1 : rdkit.Chem.rdchem.Mol + The first RDKit molecule. + mol2 : rdkit.Chem.rdchem.Mol + The second RDKit molecule. + + Returns + ------- + combined_mol : rdkit.Chem.rdchem.Mol + The combined RDKit molecule. + """ + from rdkit import Chem + + # Generate PDB block for mol1 + pdb_block1 = Chem.MolToPDBBlock(mol1) + + # Generate PDB block for mol2 + pdb_block2 = Chem.MolToPDBBlock(mol2) + + # Reindex atom indices in mol2 + reindexed_pdb_block2 = "" + reindex_map = dict() + mol2_offset = len(mol1.GetAtoms()) + for pdb_line in pdb_block2.split('\n'): + if pdb_line.startswith('ATOM') or pdb_line.startswith('HETATM'): + atom_idx = int(pdb_line[6:11]) + new_atom_idx = str(atom_idx + mol2_offset).rjust(5) + + reindexed_pdb_block2 += f"{pdb_line[:6]}{new_atom_idx}{pdb_line[11:]}\n" + reindex_map[atom_idx] = atom_idx + mol2_offset + + # Reindex CONECT lines in mol2 + for pdb_line in pdb_block2.split('\n'): + if pdb_line.startswith('CONECT'): + stripped_line = pdb_line.replace("CONECT", "") + ids_in_line = [stripped_line[i:i + 5] for i in + range(0, len(stripped_line), 5)] + + new_ids_in_line = [str(reindex_map[int(atom_id)]).rjust(5) + for atom_id in ids_in_line] + reindexed_pdb_block2 += 'CONECT' + ''.join(new_ids_in_line) + '\n' + + # Combine PDB blocks + pdb_block = pdb_block1 + reindexed_pdb_block2 + + # Remove END line + pdb_block = pdb_block.replace('END\n', '') + + # Move CONECT lines to the end + pdb_block = '\n'.join([line for line in pdb_block.split('\n') + if not line.startswith('CONECT')]) + '\n' + \ + '\n'.join([line for line in pdb_block.split('\n') + if line.startswith('CONECT')]) + '\nEND\n' + + # Remove empty lines + pdb_block = '\n'.join([line for line in pdb_block.split('\n') + if line.strip() != '']) + + # Generate combined molecule + combined_mol = Chem.MolFromPDBBlock(pdb_block, + removeHs=False) + + return combined_mol + + def alchemical_combination(self, mol1, mol2, atom_mapping, connections): """ diff --git a/setup.py b/setup.py index 5948259d..ae080f35 100644 --- a/setup.py +++ b/setup.py @@ -48,4 +48,9 @@ def find_package_data(data_root, package_root): cmdclass=versioneer.get_cmdclass(), package_data={'peleffy': find_package_data( 'peleffy/data', 'peleffy')}, + entry_points={ + 'console_scripts': [ + 'peleffy=peleffy.main:main', # Ensure this points to the correct main function + ], + }, )