Skip to content

Commit

Permalink
delayed imports reduce dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
stefdoerr committed Dec 7, 2023
1 parent f56483f commit 92f55a1
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 20 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test_and_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ jobs:
run: |
mamba install -y -q --file extra_requirements.txt python=${{ matrix.python-version }} -c acellera -c conda-forge
mamba install -y -q rdkit=2023.03.3 -c conda-forge
# scikit-learn, joblib, biopython are only used for rarely-used functions in smallmol
mamba install -y -q scikit-learn joblib biopython -c conda-forge
- name: Install moleculekit
run: |
Expand Down
6 changes: 1 addition & 5 deletions extra_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
tqdm
ipython
matplotlib-base
joblib
scikit-learn
rdkit>=2020.09.5
mdtraj
pdb2pqr>=3.5.2+18
propka
openbabel>=3.1.1
biopython
openbabel>=3.1.1
27 changes: 20 additions & 7 deletions moleculekit/smallmol/tools/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import logging
from moleculekit.smallmol.util import convertToString
from tqdm import tqdm
from joblib import Parallel, delayed # Import delayed as well for other modules


logger = logging.getLogger(__name__)
Expand All @@ -19,6 +18,7 @@ def ParallelExecutor(**joblib_args):
"""
A wrapper for joblib.Parallel to allow custom progress bars.
"""
from joblib import Parallel

def aprun(**tq_args):
tqdm_f = lambda x, args: tqdm(x, **args)
Expand Down Expand Up @@ -91,7 +91,7 @@ def cluster(
smallmol_list, method, distThresholds=0.2, returnDetails=True, removeHs=True
):
"""
Rreturn the SmallMol objects grouped in the cluster. It can also return the details of the clusters computed.
Return the SmallMol objects grouped in the cluster. It can also return the details of the clusters computed.
Parameters
----------
Expand All @@ -117,8 +117,18 @@ def cluster(
details: list
A list with all the cluster details
"""

from sklearn.cluster import DBSCAN
try:
from sklearn.cluster import DBSCAN
except ImportError:
raise ImportError(
"Please install scikit-learn to use the clustering methods. You can install it with `conda install scikit-learn`"
)
try:
from joblib import delayed
except ImportError:
raise ImportError(
"Please install joblib to use the clustering methods. You can install it with `conda install joblib`"
)

import sys

Expand All @@ -136,9 +146,7 @@ def cluster(

if method not in _methods:
raise ValueError(
"The method provided {} does not exists. The ones available are the following: {}".format(
method, _methods
)
f"The method provided '{method}' does not exist. The ones available are the following: {_methods}"
)

smallmol_list = np.array([sm.copy() for sm in smallmol_list])
Expand Down Expand Up @@ -202,6 +210,7 @@ def _maccsClustering(rdkit_mols):
The numpy array containing the tanimoto matrix
"""
from rdkit.Chem import MACCSkeys # calcola MACCS keys
from joblib import delayed

fps = []
for m in tqdm(rdkit_mols):
Expand Down Expand Up @@ -230,6 +239,7 @@ def _pathFingerprintsClustering(rdkit_mols):
The numpy array containing the tanimoto matrix
"""
from rdkit.Chem.Fingerprints import FingerprintMols # calcola path fingerprints
from joblib import delayed

fps = []
for m in tqdm(rdkit_mols):
Expand Down Expand Up @@ -258,6 +268,7 @@ def _atomsFingerprintsClustering(rdkit_mols):
The numpy array containing the dice matrix
"""
from rdkit.Chem.AtomPairs import Pairs # Atom pairs
from joblib import delayed

fps = []
for m in tqdm(rdkit_mols):
Expand Down Expand Up @@ -286,6 +297,7 @@ def _torsionsFingerprintsClustering(rdkit_mols):
The numpy array containing the dice matrix
"""
from rdkit.Chem.AtomPairs import Torsions # Topological Torsions
from joblib import delayed

fps = []
for m in tqdm(rdkit_mols):
Expand Down Expand Up @@ -318,6 +330,7 @@ def _circularFingerprintsClustering(rdkit_mols, radius=2):
The numpy array containing the dice matrix
"""
from rdkit.Chem import AllChem # calcola circular fingerprints
from joblib import delayed

fps = []
for m in rdkit_mols:
Expand Down
14 changes: 8 additions & 6 deletions moleculekit/smallmol/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ def getChemblSimilarLigandsBySmile(smi, threshold=85, returnSmiles=False):


def convertToString(arr):

if isinstance(arr, list):
arr_str = " ".join([str(i) for i in arr])
elif isinstance(arr, tuple):
Expand Down Expand Up @@ -256,7 +255,6 @@ def _depictMol(
from os.path import splitext
from rdkit.Chem import Kekulize
from rdkit.Chem.Draw import rdMolDraw2D
from IPython.display import SVG

if highlightAtoms is not None and not isinstance(highlightAtoms, list):
raise ValueError(
Expand Down Expand Up @@ -319,6 +317,8 @@ def _depictMol(

# activate jupiter-notebook rendering
if ipython:
from IPython.display import SVG

svg = svg.replace("svg:", "")
return SVG(svg)
else:
Expand Down Expand Up @@ -358,7 +358,6 @@ def depictMultipleMols(
"""
from rdkit.Chem.Draw import MolsToGridImage
from IPython.display import SVG
from os.path import splitext

sel_atoms = []
Expand All @@ -382,9 +381,10 @@ def depictMultipleMols(

from rdkit.Chem.Draw import IPythonConsole as CDIPythonConsole

if MolsToGridImage == CDIPythonConsole.ShowMols:
CDIPythonConsole.UninstallIPythonRenderer()
from rdkit.Chem.Draw import MolsToGridImage
if ipython:
if MolsToGridImage == CDIPythonConsole.ShowMols:
CDIPythonConsole.UninstallIPythonRenderer()
from rdkit.Chem.Draw import MolsToGridImage

svg = MolsToGridImage(
mols_list,
Expand All @@ -404,6 +404,8 @@ def depictMultipleMols(
f.close()

if ipython:
from IPython.display import SVG

_svg = SVG(svg)
return _svg
else:
Expand Down
12 changes: 10 additions & 2 deletions moleculekit/tools/atomtyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,13 @@ def getFeatures(mol):

def parallel(func, listobj, n_cpus=-1, *args):
from tqdm import tqdm
from joblib import Parallel, delayed

try:
from joblib import Parallel, delayed
except ImportError:
raise ImportError(
"Please install joblib to use the parallel function with `conda install joblib`."
)

results = Parallel(n_jobs=n_cpus)(delayed(func)(ob, *args) for ob in tqdm(listobj))
return results
Expand All @@ -472,7 +478,9 @@ def test_preparation(self):

assert mol_equal(mol2, ref, exceptFields=("coords",))

@unittest.skipIf(sys.platform.startswith("win"), "Windows OBabel fails at atom typing")
@unittest.skipIf(
sys.platform.startswith("win"), "Windows OBabel fails at atom typing"
)
def test_obabel_atomtyping(self):
from moleculekit.home import home
from moleculekit.molecule import Molecule
Expand Down

0 comments on commit 92f55a1

Please sign in to comment.