Skip to content

Commit

Permalink
clustering tab working
Browse files Browse the repository at this point in the history
  • Loading branch information
kamurani committed Aug 2, 2022
1 parent ef2f90a commit 4018430
Show file tree
Hide file tree
Showing 9 changed files with 2,458 additions and 61 deletions.
2,115 changes: 2,115 additions & 0 deletions embeddings/ALL_CURRENT.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pomegranate/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import dash
from dash import Dash, dcc, html, Input, Output
from tabs.seeMotif import motifVisualisationTab
from tabs.clustering import clustering_tab
from tabs.clustering import clustering_tab, update_graph, update_vis_1

PROTEIN_ID = "default"

Expand Down
11 changes: 11 additions & 0 deletions pomegranate/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,19 @@

SAVED_GRAPHS_DIR = os.path.join(ROOT_DIR, "graphs")

SAVED_CLUSTER_GRAPHS_PATH = os.path.join(ROOT_DIR, "clustering_saved_graphs")

STRUCTURE_HUMAN_PATH = os.path.join(STRUCTURE_PATH, 'human')
STRUCTURE_YEAST_PATH = os.path.join(STRUCTURE_PATH, 'yeast')



EMBEDDINGS_FILENAME = "embeddings_output.csv" #'embeddings.csv'
n = 60
n = 61
EMBEDDINGS_FILENAME = f"EM_gcn_E{n}.csv"
EMBEDDINGS_FILENAME = f"ALL_CURRENT.csv"
EMBEDDINGS_PATH = os.path.join(os.path.join(ROOT_DIR, 'embeddings'), EMBEDDINGS_FILENAME)



152 changes: 152 additions & 0 deletions pomegranate/interactive.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"To use the Graphein submodule graphein.protein.visualisation, you need to install: pytorch3d \n",
"To do so, use the following command: conda install -c pytorch3d pytorch3d\n"
]
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[08/02/22 12:11:29] </span><span style=\"color: #800000; text-decoration-color: #800000\">WARNING </span> To use the Graphein submodule <a href=\"file:///home/cam/mambaforge/lib/python3.9/site-packages/graphein/protein/meshes.py\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">meshes.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/cam/mambaforge/lib/python3.9/site-packages/graphein/protein/meshes.py#29\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">29</span></a>\n",
" graphein.protein.meshes, you need to install: \n",
" pytorch3d \n",
" To do so, use the following command: conda install \n",
" -c pytorch3d pytorch3d \n",
"</pre>\n"
],
"text/plain": [
"\u001b[2;36m[08/02/22 12:11:29]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m To use the Graphein submodule \u001b]8;id=645620;file:///home/cam/mambaforge/lib/python3.9/site-packages/graphein/protein/meshes.py\u001b\\\u001b[2mmeshes.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=336006;file:///home/cam/mambaforge/lib/python3.9/site-packages/graphein/protein/meshes.py#29\u001b\\\u001b[2m29\u001b[0m\u001b]8;;\u001b\\\n",
" graphein.protein.meshes, you need to install: \n",
" pytorch3d \n",
" To do so, use the following command: conda install \n",
" -c pytorch3d pytorch3d \n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from graphein.protein.config import ProteinGraphConfig\n",
"\n",
"config = ProteinGraphConfig()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9889db684b504c10b7e76c5a34eab4e3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
],
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n",
"</pre>\n"
],
"text/plain": [
"\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from graphein.protein.graphs import construct_graph\n",
"\n",
"g = construct_graph(config=config, pdb_code=\"3eiy\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'rsa'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/mnt/home2nd/pomegranate/pomegranate/interactive.ipynb Cell 3\u001b[0m in \u001b[0;36m<cell line: 9>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/mnt/home2nd/pomegranate/pomegranate/interactive.ipynb#ch0000002?line=7'>8</a>\u001b[0m avg_rsa \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m\n\u001b[1;32m <a href='vscode-notebook-cell:/mnt/home2nd/pomegranate/pomegranate/interactive.ipynb#ch0000002?line=8'>9</a>\u001b[0m \u001b[39mfor\u001b[39;00m k, n \u001b[39min\u001b[39;00m g\u001b[39m.\u001b[39mnodes(data\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[0;32m---> <a href='vscode-notebook-cell:/mnt/home2nd/pomegranate/pomegranate/interactive.ipynb#ch0000002?line=10'>11</a>\u001b[0m \u001b[39mprint\u001b[39m(n[\u001b[39m'\u001b[39;49m\u001b[39mrsa\u001b[39;49m\u001b[39m'\u001b[39;49m])\n",
"\u001b[0;31mKeyError\u001b[0m: 'rsa'"
]
}
],
"source": [
"from graphein.protein.subgraphs import extract_subgraph_from_point\n",
"#from protein.phosphosite import get_protein_subgraph_radius\n",
"\n",
"site = list(g.nodes())[10]\n",
"\n",
"#g = get_protein_subgraph_radius(g=g, site=site, r=r)\n",
"\n",
"avg_rsa = 0\n",
"for k, n in g.nodes(data=True):\n",
" \n",
" print(n['meiler'])\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.7 ('base')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "dba50d74266ba8c3f81eac460edffb71d253875af3f2e47df42dacc524c6cffc"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
12 changes: 8 additions & 4 deletions pomegranate/json_conversion/graphein_to_json.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import Dict
from typing import Dict, Union
import graphein.protein as gp

import networkx as nx
Expand Down Expand Up @@ -84,18 +84,22 @@ def g_to_json(g, prot_id, db_name='PDB', save_path=SAVED_GRAPHS_DIR):
Get graph back from json
'''
def load_prot_graph (
json_graph: Dict,
json_graph: Union[Dict, str],
) -> nx.Graph:

"""
:param json_graph: JSON object that represents a NetworkX graph. Can be loaded in from a JSON file.
:param json_graph: JSON string or JSON object that represents a NetworkX graph. Can be loaded in from a JSON file.
:type json_graph: Dict
:return: NetworkX protein graph
:rtype: nx.Graph
"""

# Load general graph
g: nx.Graph = nx.readwrite.json_graph.node_link_graph(json.loads(json_graph))

if type(json_graph) == str:
json_graph = json.loads(json_graph)

g: nx.Graph = nx.readwrite.json_graph.node_link_graph(json_graph)

# Convert specific fields from strings
g.graph["pdb_df"] = pd.read_json(g.graph["pdb_df"])
Expand Down
34 changes: 31 additions & 3 deletions pomegranate/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,25 @@ class GraphDict:
pass


"""
Get average RSA for a graph (or subgraph)
"""
def get_avg_rsa(
g: nx.Graph,
) -> float:

sum = 0
num = len(list(g.nodes()))
for k, n in g.nodes(data=True):
sum += float(n['rsa'])

return sum / num






"""
String printing functions
"""
Expand Down Expand Up @@ -225,7 +244,7 @@ def load_graphs(
if verbose:
print(f"[{index:4d}] Constructing graph from {acc}...", end=" ")

try:
if True:
g = construct_graph(config, pdb_path=pdb_path)

pos: int = int(res_pos)
Expand All @@ -238,6 +257,8 @@ def load_graphs(

g = get_surface_motif(g, site=res, r=radius_threshold, asa_threshold=rsa_threshold)

avg_rsa = get_avg_rsa(g)

# Assert that phosphosite residue is same as what we expected
assert aa3to1(psite_res) == res_code, f"Residue mismatch {psite_res} and {res_code}"
assert aa3to1(res.split(':')[1]) == res_code, f"Residue mismatch {res} and {res_code} {pos}"
Expand All @@ -249,8 +270,15 @@ def load_graphs(
# Assert that phosphosiste is included in the graph.
# TODO: display green on the terminal output if it is included;
# Display red on terminal if it is excluded (and --force was used.)
try:

graph = {'graph': g, 'kinase': kinase, 'psite': psite, 'res': res}
graph = {
'graph': g,
'kinase': kinase,
'psite': psite,
'res': res,
'average_rsa': avg_rsa,
}
graphs[index] = graph

psite_contained = res in list(g.nodes())
Expand Down Expand Up @@ -293,7 +321,7 @@ def load_graphs(
print(f"{stats['num_fail']} graph constructions failed")
print("")

return graphs
return graphs


'''
Expand Down
Empty file.
Loading

0 comments on commit 4018430

Please sign in to comment.