Skip to content

Commit

Permalink
changed definitions. attempt to debug AF search. Has something to do …
Browse files Browse the repository at this point in the history
…with configs
  • Loading branch information
z5310969 committed Aug 1, 2022
2 parents 5c426cb + 2ac3ce6 commit e9aba60
Show file tree
Hide file tree
Showing 21 changed files with 47,284 additions and 378 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
### POMEGRANATE ###
structures/

saved_graphs/*
saved_graphs/
graphs/


# Byte-compiled / optimized / DLL files
Expand Down
1,415 changes: 1,415 additions & 0 deletions examples/pdbs/134l.pdb

Large diffs are not rendered by default.

4,017 changes: 4,017 additions & 0 deletions examples/pdbs/16gs.pdb

Large diffs are not rendered by default.

2,147 changes: 2,147 additions & 0 deletions examples/pdbs/3eiy.pdb

Large diffs are not rendered by default.

6,832 changes: 6,832 additions & 0 deletions examples/pdbs/Q5VSL9.pdb

Large diffs are not rendered by default.

9,395 changes: 9,395 additions & 0 deletions examples/pdbs/Q8W3K0.pdb

Large diffs are not rendered by default.

6,832 changes: 6,832 additions & 0 deletions examples/pdbsQ5VSL9.pdb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion graphs/10gs_PDB.json

This file was deleted.

2 changes: 1 addition & 1 deletion graphs/121p_PDB.json

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion graphs/4hhb_PDB.json

This file was deleted.

62 changes: 42 additions & 20 deletions pomegranate/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,52 @@
from distutils.log import debug
import dash
from dash import Dash, dcc, html, Input, Output
from tabs.seeMotif import motifVisualisationTab, sidebarTab
from tabs.seeMotif import motifVisualisationTab

PROTEIN_ID = "default"

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = Dash(__name__, external_stylesheets=external_stylesheets)

# sidebar = html.Div(
# id="sidebar",
# children= [
# html.H3("Find a protein"),
# html.Hr(),
# html.P(
# "A placeholder for searching a protein of interest"
# ),
# html.Button("Button")
# ]
# )
'''
Sidebar
'''
fnameDict = {'PDB': ['opt1_p', 'opt2_p', 'opt3_p'], 'SWISS_PROT': ['opt1_s', 'opt2_s'], 'AlphaFold': ['opt1_a']}

names = list(fnameDict.keys())
nestedOptions = fnameDict[names[0]]

sidebar = html.Div(
id="sidebar-container",
children=[
html.H3("Find a protein"),
html.Hr(),
dcc.Dropdown(
id='db-dropdown',
options=[{'label':name, 'value':name} for name in names],
value = list(fnameDict.keys())[0],
style={'width': '80%'},
),
dcc.Input(
id="prot-input",
type="text",
#value="4hhb", # TODO: REMOVE THIS AFTER TESTING
placeholder="Protein ID",
style={'width': '80%'},
debounce=True,
persistence=True,
persistence_type='session'
),
dcc.Store(id='intermediate-value-prot', storage_type='session'),
dcc.Store(id='intermediate-value-psites', storage_type='session'),
html.Div(id='input-show'), # DEBUGGING inputs
],
)

'''
Tabs
'''
tab_selected_style = {
'borderTop': '3px solid #b52d37',
}
Expand All @@ -46,13 +68,20 @@
]
)

app.layout = html.Div([
base_page = html.Div([
html.Img(src=app.get_asset_url('imgs/POMEGRANATE-LOGO.png'), style={'width': '40%'}),
html.H2('PhOsphosite Motif Explorer -- GRAph Network Abstraction Through Embeddings'),
html.Div(id="content-grid", children=[sidebar,content])

])

app.layout = base_page

app.validation_layout = html.Div([
base_page,
motifVisualisationTab()
])

@app.callback(Output('tab-container', 'children'),
Input('tab-options', 'value'))
def render_content(tab):
Expand All @@ -76,13 +105,6 @@ def render_content(tab):
elif tab == 'documentation':
return html.H3('Documentation')

'''
NOT SURE WHAT TO DO FOR INPUT
'''
@app.callback(Output('sidebar-container', 'children'),
Input('tab-options', 'value'))
def render_content(tab):
return sidebarTab()



Expand Down
7 changes: 3 additions & 4 deletions pomegranate/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
# Pomegranate root
source_dir = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(source_dir)
STRUCTURE_PATH = os.path.join(ROOT_DIR, 'structures')
<<<<<<< HEAD
=======
STRUCTURE_PATH = os.path.join(ROOT_DIR, 'pomegranate/structures/')

SAVED_GRAPHS_DIR = os.path.join(ROOT_DIR, "graphs")

SAVED_PDB_DIR = os.path.join(ROOT_DIR, 'examples/pdbs/')

STRUCTURE_HUMAN_PATH = os.path.join(STRUCTURE_PATH, 'human')

EMBEDDINGS_FILENAME = "embeddings_output.csv" #'embeddings.csv'
EMBEDDINGS_PATH = os.path.join(os.path.join(ROOT_DIR, 'embeddings'), EMBEDDINGS_FILENAME)

>>>>>>> 699e6d4481baa3c2382dc4ff8a01d0654ff7e642
66 changes: 16 additions & 50 deletions pomegranate/json_conversion/graphein_to_json.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,7 @@
import graphein.protein as gp
<<<<<<< HEAD
from graphein.testing import graphs_isomorphic
import networkx.readwrite as nx
import json
# import sys # For cmd line debugging


def g_to_json(g):

del g.graph["config"] # Remove the config from the graph as it's not easily serialisable

# Ensure graph data is in JSON format
for k, v in g.graph.items():
try:
g.graph[k] = v.to_json()
except AttributeError:
try:
g.graph[k] = v.tolist()
except AttributeError:
continue

# Ensure node data is in JSON format
for n, d in g.nodes(data=True):
for k, v in d.items():
try:
d[k] = v.to_json()
except AttributeError:
try:
d[k] = v.tolist()
except AttributeError:
continue

# Ensure edge data is in JSON format
for _, _, d in g.edges(data=True):
for k, v in d.items():
try:
d[k] = v.to_json()
except AttributeError:
try:
d[k] = v.tolist()
except AttributeError:
try:
d[k] = list(v)
except AttributeError:
continue

j_graph = nx.json_graph.node_link_data(g)
return j_graph
=======
import networkx.readwrite as nx
import pandas as pd
import numpy as np
import json

from definitions import SAVED_GRAPHS_DIR
Expand Down Expand Up @@ -107,7 +60,6 @@ def g_to_json(g, prot_id, db_name='PDB'):
f.write(json.dumps(tmp))

return j_graph
>>>>>>> 699e6d4481baa3c2382dc4ff8a01d0654ff7e642
# print(json.dumps(j_graph))
# # Write the graph to a JSON file
# with open("test.json", 'w') as f:
Expand All @@ -123,6 +75,20 @@ def g_to_json(g, prot_id, db_name='PDB'):

# assert graphs_isomorphic(g, h), "Graphs are not isomorphic"

'''
Get graph back from json
'''
def load_prot_graph (json_graph):

# Load general graph
g = nx.json_graph.node_link_graph(json.loads(json_graph))

# Convert specific fields from strings
g.graph["pdb_df"] = pd.read_json(g.graph["pdb_df"])
g.graph["coords"] = np.array(g.graph["coords"])

return g

'''
DEBUGGING
'''
Expand Down
64 changes: 64 additions & 0 deletions pomegranate/protein/interactions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import networkx as nx

from graphein.protein.utils import filter_dataframe
from graphein.protein.edges.distance import compute_distmat, get_interacting_atoms


'''
From graphein.protein.edges.distance
Modified by Naomi Warren
'''
def add_distance_threshold(
G: nx.Graph, long_interaction_threshold: int, threshold: float = 5.0
):
"""
Adds edges to any nodes within a given distance of each other.
Long interaction threshold is used to specify minimum separation in sequence
to add an edge between networkx nodes within the distance threshold
:param G: Protein Structure graph to add distance edges to
:type G: nx.Graph
:param long_interaction_threshold: minimum distance in sequence for two
nodes to be connected
:type long_interaction_threshold: int
:param threshold: Distance in angstroms, below which two nodes are connected
:type threshold: float
:return: Graph with distance-based edges added
"""
pdb_df = filter_dataframe(
G.graph["pdb_df"], "node_id", list(G.nodes()), True
)
dist_mat = compute_distmat(pdb_df)
interacting_nodes = get_interacting_atoms(threshold, distmat=dist_mat)
interacting_nodes = list(zip(interacting_nodes[0], interacting_nodes[1]))

#log.info(f"Found: {len(interacting_nodes)} distance edges")
count = 0
for a1, a2 in interacting_nodes:

# Don't bother adding self-loops
if a1 == a2:
continue

n1 = pdb_df.at[a1, "node_id"]
n2 = pdb_df.at[a2, "node_id"]
n1_chain = pdb_df.at[a1, "chain_id"]
n2_chain = pdb_df.at[a2, "chain_id"]
n1_position =pdb_df.at[a1, "residue_number"]
n2_position = pdb_df.at[a2, "residue_number"]

condition_1 = n1_chain == n2_chain
condition_2 = (
abs(n1_position - n2_position) < long_interaction_threshold
)

if not (condition_1 and condition_2):
count += 1
if G.has_edge(n1, n2):
G.edges[n1, n2]["kind"].add("distance_threshold")
else:
G.add_edge(n1, n2, kind={"distance_threshold"})
# log.info(
# f"Added {count} distance edges. ({len(list(interacting_nodes)) - count} removed by LIN)"
# )

return G
Loading

0 comments on commit e9aba60

Please sign in to comment.