Skip to content

Commit

Permalink
minor improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
yazdanimehdi authored and yazdanimehdi committed Oct 22, 2024
1 parent b9930ad commit b03e668
Show file tree
Hide file tree
Showing 14 changed files with 192,386 additions and 110 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -179,4 +179,5 @@ drug_b.py
**/._.DS_Store
**/**/pdb/
*.pdb
.VSCodeCounter/
.VSCodeCounter/
*.pth
191,809 changes: 191,809 additions & 0 deletions data/drugbank_ddi/drugbank_DDI.tab

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions davis_correct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import pandas as pd
import numpy as np
# Load the data
data = pd.read_csv('data/davis/davis.txt', sep=',')
data["Y"] = data["Y"].apply(lambda x: -np.log10(x/1e9))
data.to_csv('data/davis/davis.txt', index=False)
121 changes: 121 additions & 0 deletions deepdrugdomain/configs/fx_ddi.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
{
"model": {
"default": {
"protein_graph_conv_layer": [
"dgl_tag",
"dgl_tag",
"dgl_gat"
],
"ligand_graph_conv_layer": [
"dgl_tag",
"dgl_tag",
"dgl_gat"
],
"protein_input_size": 74,
"ligand_input_size": 74,
"protein_graph_conv_dims": [
74,
128
],
"ligand_graph_conv_dims": [
74,
128
],
"protein_conv_dropout_rate": [
0.05,
0.05,
0.05
],
"protein_conv_normalization": [
false,
false,
false
],
"ligand_conv_dropout_rate": [
0.05,
0.05,
0.05
],
"ligand_conv_normalization": [
false,
false,
false
],
"head_dropout_rate": 0.05,
"head_activation_fn": [
null
],
"head_normalization": [
"layer_norm"
],
"protein_graph_conv_kwargs": [
{
"k": 4
},
{
"k": 4
},
{
"num_heads": 2
}
],
"ligand_graph_conv_kwargs": [
{
"k": 8
},
{
"k": 8
},
{
"num_heads": 2
}
],
"ligand_graph_pooling_kwargs": {},
"protein_graph_pooling_kwargs": {},
"embedding_dim": 256,
"ligand_graph_pooling": null,
"protein_graph_pooling": null,
"self_attention_depth": 4,
"self_attention_num_heads": 4,
"self_attention_mlp_ratio": 4,
"self_attention_qkv_bias": true,
"self_attention_qk_scale": null,
"self_attention_drop_rate": 0.4,
"self_attn_drop_rate": 0.0,
"self_drop_path_rate": 0.4,
"self_norm_layer": "layer_norm",
"input_norm_layer": "layer_norm",
"output_norm_layer": "layer_norm",
"block_layers": "transformer_attention_block",
"input_block_layers": "transformer_cross_attention_block",
"output_block_layers": "transformer_cross_attention_block",
"self_act_layer": "gelu",
"input_act_layer": "gelu",
"output_act_layer": "gelu",
"attention_block": "transformer_attention",
"self_mlp_block": "transformer_mlp",
"input_mlp_block": "transformer_mlp",
"output_mlp_block": "transformer_mlp",
"input_cross_att_block": "transformer_cross_attention",
"output_cross_att_block": "transformer_cross_attention",
"input_cross_attention_num_heads": 4,
"input_cross_attention_mlp_ratio": 4,
"input_cross_attention_qkv_bias": true,
"input_cross_attention_qk_scale": null,
"input_cross_attention_drop_rate": 0.1,
"input_cross_attn_drop_rate": 0.0,
"input_cross_drop_path_rate": 0.1,
"output_cross_attention_num_heads": 4,
"output_cross_attention_mlp_ratio": 4,
"output_cross_attention_qkv_bias": true,
"output_cross_attention_qk_scale": null,
"output_cross_attention_drop_rate": 0.4,
"output_cross_attn_drop_rate": 0.0,
"output_cross_drop_path_rate": 0.4,
"input_stages": 3,
"output_stages": 3,
"latent_space": 300,
"head_dims": []
}
}
}
1 change: 1 addition & 0 deletions deepdrugdomain/data/datasets/DDI_datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .drugbank import DrugBankDDIDataset
65 changes: 65 additions & 0 deletions deepdrugdomain/data/datasets/DDI_datasets/drugbank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import os
from typing import Dict, List, Optional, Tuple, Union
from deepdrugdomain.data.utils import CustomDataset
from deepdrugdomain.data.preprocessing.utils.preprocessing_data_struct import PreprocessingObject
from ..factory import DatasetFactory


@DatasetFactory.register('drugbank_ddi')
class DrugBankDDIDataset(CustomDataset):
"""
Dataset class for DrugBank drug-drug interaction data.
This class extends CustomDataset to provide a structured way to load and preprocess the DrugBank interaction
datasets. It supports the integration of drug and drug data, along with their corresponding labels, for tasks
such as interaction prediction.
Parameters:
file_paths (str): Directory path where data files are stored or to be downloaded.
preprocesses (PreprocessingObject): Preprocessing configuration(s) for drug, protein, and label data.
save_directory (Optional[str]): The directory to save processed files, defaults to `file_paths` if None.
urls (Optional[Union[List[str], str]]): URLs to download the dataset files if not present at `file_paths`.
common_columns (Optional[Union[Dict[str, str], List[Dict[str, str]]]]): Mapping of common column names to the
expected format.
separators (Union[List[str], str], optional): List of separators used in the data files.
associated_model (Optional[str]): The name of the model associated with the dataset, if any.
threads (int, optional): Number of threads to use for data processing.
Example:
>>> dataset = DrugBankDataset(
... file_paths='/data/drugbank/',
... drug_preprocess_type=('canonical_smiles', {'remove_hydrogens': True}),
... protein_preprocess_type=('sequence', {'tokenization': 'char'}),
... protein_attributes='sequence',
... in_memory_preprocessing_protein=True,
... )
>>> train_dataset, val_dataset, test_dataset = dataset.split(splits=[0.8, 0.1, 0.1], return_df=False)
>>> # Preprocess and split the dataset into train, validation, and test sets and prepare data for training or analysis
>>> drugbank_dataframe = dataset.to_dataframe() # Get the raw dataset as a pandas DataFrame
Note:
The class automatically downloads the necessary files if they are not available in the given `file_paths` during
initialization, using the provided `urls` for data source.
"""

def __init__(self, file_paths: str,
preprocesses: PreprocessingObject,
save_directory: str | None = None,
# Edit the URL
urls: List[str] | str | None = ['https://github.com/khodabandeh-ali/D3-NewTasks/blob/main/data/drugbank/drugbank_DDI.tab'],
common_columns: Dict[str,
str] | List[Dict[str, str]] | None = {},
separators: List[str] | str = ['\t'],
associated_model: str | None = None,
threads: int = 4) -> None:

self.file_paths = file_paths
drugbank_data_path = os.path.join(self.file_paths, 'drugbank_DDI.tab')

file_paths = [drugbank_data_path]
save_directory = self.file_paths if save_directory is None else save_directory
super().__init__(file_paths, preprocesses, save_directory, urls,
common_columns, separators, associated_model, None, threads)

if not os.path.exists(drugbank_data_path):
self.download()
1 change: 1 addition & 0 deletions deepdrugdomain/data/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .DTI_datasets import *
from .DTA_datasets import *
from .factory import DatasetFactory
from .DDI_datasets import *
12 changes: 9 additions & 3 deletions deepdrugdomain/data/preprocessing/drug/smile_to_dgl_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def preprocess(self, data: str) -> Optional[dgl.DGLGraph]:
return None
smile_graphs = [smiles_to_bigraph(
f, add_self_loop=True, node_featurizer=self.node_featurizer, edge_featurizer=self.edge_featurizer) for f in frags]
constructed_graphs = dgl.batch(smile_graphs)
constructed_graphs = smile_graphs

except Exception as e:
constructed_graphs = None
Expand All @@ -109,7 +109,13 @@ def preprocess(self, data: str) -> Optional[dgl.DGLGraph]:
return constructed_graphs

def save_data(self, data: dgl.DGLGraph, path: str) -> None:
dgl.save_graphs(path, [data])
if not isinstance(data, dgl.DGLGraph):
super().save_data(data, path)
else:
dgl.save_graphs(path, [data])

def load_data(self, path: str) -> dgl.DGLGraph:
return dgl.load_graphs(path)[0][0]
if self.fragment:
return super().load_data(path)
else:
return dgl.load_graphs(path)[0][0]
2 changes: 1 addition & 1 deletion deepdrugdomain/layers/graph_layers/dgl_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def forward(self, g: dgl.DGLGraph) -> dgl.DGLGraph:
features = self.dropout(features)

new_g = g
new_g.ndata['h'] = features
new_g.ndata['h'] = torch.mean(features, dim=1)

return new_g

Expand Down
1 change: 1 addition & 0 deletions deepdrugdomain/models/DDI/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .fx_ddi import FragXSiteDDI
Loading

0 comments on commit b03e668

Please sign in to comment.