Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace pygraphviz with neo4j-viz for graph visualization #306

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

## Next

### Changed

- Replaced PyGraphviz with neo4j-viz for pipeline visualization, eliminating the need for system-level dependencies.


## 1.6.0

### Added
Expand Down
5 changes: 3 additions & 2 deletions examples/customize/build_graph/pipeline/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,6 @@ async def run(self, number: IntDataModel) -> IntDataModel:
pipe.connect("times_two", "addition", {"a": "times_two.value"})
pipe.connect("times_ten", "addition", {"b": "times_ten.value"})
pipe.connect("addition", "save", {"number": "addition"})
pipe.draw("graph.png")
pipe.draw("graph_full.png", hide_unused_outputs=False)
# Save as HTML files for interactive visualization
pipe.draw("graph.html")
pipe.draw("graph_full.html", hide_unused_outputs=False)
397 changes: 343 additions & 54 deletions poetry.lock

Large diffs are not rendered by default.

9 changes: 3 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@ pyyaml = "^6.0.2"
types-pyyaml = "^6.0.12.20240917"
# optional deps
langchain-text-splitters = {version = "^0.3.0", optional = true }
pygraphviz = [
{version = "^1.13.0", python = ">=3.10,<4.0.0", optional = true},
{version = "^1.0.0", python = "<3.10", optional = true}
]
neo4j-viz = {version = "^0.1.0"}
weaviate-client = {version = "^4.6.1", optional = true }
pinecone-client = {version = "^4.1.0", optional = true }
google-cloud-aiplatform = {version = "^1.66.0", optional = true }
Expand Down Expand Up @@ -77,9 +74,9 @@ ollama = ["ollama"]
openai = ["openai"]
mistralai = ["mistralai"]
qdrant = ["qdrant-client"]
kg_creation_tools = ["pygraphviz"]
kg_creation_tools = ["neo4j-viz"]
sentence-transformers = ["sentence-transformers"]
experimental = ["langchain-text-splitters", "pygraphviz", "llama-index"]
experimental = ["langchain-text-splitters", "neo4j-viz", "llama-index"]
examples = ["langchain-openai", "langchain-huggingface"]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion src/neo4j_graphrag/embeddings/cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
try:
import cohere
except ImportError:
cohere = None # type: ignore
cohere = None # type: ignore[assignment]


class CohereEmbeddings(Embedder):
Expand Down
4 changes: 3 additions & 1 deletion src/neo4j_graphrag/embeddings/mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
try:
from mistralai import Mistral
except ImportError:
Mistral = None # type: ignore
# Define placeholder type for type checking
class Mistral: # type: ignore
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this better compared to the previous approach? (out of curiosity)

pass


class MistralAIEmbeddings(Embedder):
Expand Down
142 changes: 113 additions & 29 deletions src/neo4j_graphrag/experimental/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,16 @@
from neo4j_graphrag.utils.logging import prettify

try:
import pygraphviz as pgv
from neo4j_viz import (
Node,
Relationship,
VisualizationGraph as NeoVizGraph,
CaptionAlignment,
)

HAS_NEO4J_VIZ = True
except ImportError:
pgv = None
HAS_NEO4J_VIZ = False

from pydantic import BaseModel

Expand Down Expand Up @@ -182,40 +189,100 @@ def show_as_dict(self) -> dict[str, Any]:
return pipeline_config.model_dump()

def draw(
self, path: str, layout: str = "dot", hide_unused_outputs: bool = True
self, path: str, layout: str = "force", hide_unused_outputs: bool = True
) -> Any:
G = self.get_pygraphviz_graph(hide_unused_outputs)
G.layout(layout)
G.draw(path)
"""Draw the pipeline graph using neo4j-viz.

def get_pygraphviz_graph(self, hide_unused_outputs: bool = True) -> pgv.AGraph:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't remember why we haven't made this method private, but since we are removing it we should mention it in the changelog.

if pgv is None:
Args:
path (str): Path to save the visualization. If the path ends with .html, it will save an HTML file.
Otherwise, it will save a PNG image.
layout (str): Layout algorithm to use. Default is "force".
hide_unused_outputs (bool): Whether to hide unused outputs. Default is True.

Returns:
Any: The visualization object.
"""
G = self.get_neo4j_viz_graph(hide_unused_outputs)
if path.endswith(".html"):
# Save as HTML file
with open(path, "w") as f:
f.write(G.render()._repr_html_())
else:
# For other formats, we'll use the render method and save the image
G.render()
# Note: neo4j-viz doesn't support direct saving to image formats
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the comment saying that it can't save to PNG? Because code in the 'else' looks very similar to the one in the former 'if' block to me.

# If image format is needed, consider using a screenshot or other methods
with open(path, "w") as f:
f.write(G.render()._repr_html_())

def get_neo4j_viz_graph(self, hide_unused_outputs: bool = True) -> NeoVizGraph:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And maybe make this method private?

"""Create a neo4j-viz visualization graph from the pipeline.

Args:
hide_unused_outputs (bool): Whether to hide unused outputs. Default is True.

Returns:
NeoVizGraph: The neo4j-viz visualization graph.
"""
if not HAS_NEO4J_VIZ:
raise ImportError(
"Could not import pygraphviz. "
"Follow installation instruction in pygraphviz documentation "
"to get it up and running on your system."
"Could not import neo4j-viz. "
"Install it with 'pip install neo4j-viz'."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using extra groups instead reduces a bit dependency issues:

Suggested change
"Install it with 'pip install neo4j-viz'."
'Install it with 'pip install "neo4j-graphrag[experimental]".'

)
self.validate_parameter_mapping()
G = pgv.AGraph(strict=False, directed=True)
# create a node for each component

nodes = []
relationships = []
node_ids = {}
node_counter = 0

# Create nodes for each component
for n, node in self._nodes.items():
comp_inputs = ",".join(
f"{i}: {d['annotation']}"
for i, d in node.component.component_inputs.items()
)
G.add_node(
n,
node_type="component",
shape="rectangle",
label=f"{node.component.__class__.__name__}: {n}({comp_inputs})",
node_ids[n] = node_counter
nodes.append(
Node(
id=node_counter,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the doc, it looks like id can be a string? That could simplify the id and node matching part.

caption=f"{node.component.__class__.__name__}: {n}({comp_inputs})",
size=20, # Component nodes are larger
color="#4C8BF5", # Blue for component nodes
caption_alignment=CaptionAlignment.CENTER,
caption_size=3,
)
)
# create a node for each output field and connect them it to its component
node_counter += 1

# Create nodes for each output field
for o in node.component.component_outputs:
param_node_name = f"{n}.{o}"
G.add_node(param_node_name, label=o, node_type="output")
G.add_edge(n, param_node_name)
# then we create the edges between a component output
# and the component it gets added to
node_ids[param_node_name] = node_counter
nodes.append(
Node(
id=node_counter,
caption=o,
size=10, # Output nodes are smaller
color="#34A853", # Green for output nodes
caption_alignment=CaptionAlignment.CENTER,
caption_size=3,
)
)
# Connect component to its output
relationships.append(
Relationship(
source=node_ids[n],
target=node_ids[param_node_name],
caption="",
caption_align=CaptionAlignment.CENTER,
caption_size=10,
color="#000000",
)
)
node_counter += 1

# Create edges between components and their inputs
for component_name, params in self.param_mapping.items():
for param, mapping in params.items():
source_component = mapping["component"]
Expand All @@ -224,13 +291,30 @@ def get_pygraphviz_graph(self, hide_unused_outputs: bool = True) -> pgv.AGraph:
source_output_node = f"{source_component}.{source_param_name}"
else:
source_output_node = source_component
G.add_edge(source_output_node, component_name, label=param)
# remove outputs that are not mapped

if source_output_node in node_ids and component_name in node_ids:
relationships.append(
Relationship(
source=node_ids[source_output_node],
target=node_ids[component_name],
caption=param,
color="#EA4335", # Red for parameter connections
caption_align=CaptionAlignment.CENTER,
caption_size=10,
)
)

# Filter unused outputs if requested
if hide_unused_outputs:
for n in G.nodes():
if n.attr["node_type"] == "output" and G.out_degree(n) == 0: # type: ignore
G.remove_node(n)
return G
used_nodes = set()
for rel in relationships:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there is a logic issue here: we do not want to filter out isolated nodes (this should not happen I think?), but node of type "output" with no outgoing relationship.

used_nodes.add(rel.source)
used_nodes.add(rel.target)

filtered_nodes = [node for node in nodes if node.id in used_nodes]
return NeoVizGraph(nodes=filtered_nodes, relationships=relationships)

return NeoVizGraph(nodes=nodes, relationships=relationships)

def add_component(self, component: Component, name: str) -> None:
"""Add a new component. Components are uniquely identified
Expand Down
14 changes: 9 additions & 5 deletions src/neo4j_graphrag/llm/anthropic_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def get_messages(
raise LLMGenerationError(e.errors()) from e
messages.extend(cast(Iterable[dict[str, Any]], message_history))
messages.append(UserMessage(content=input).model_dump())
return messages # type: ignore
return messages # type: ignore[return-value]

def invoke(
self,
Expand All @@ -112,8 +112,10 @@ def invoke(
"""
try:
if isinstance(message_history, MessageHistory):
message_history = message_history.messages
messages = self.get_messages(input, message_history)
message_history.add_message(LLMMessage(role="user", content=input))
messages = message_history
else:
messages = self.get_messages(input, message_history)
response = self.client.messages.create(
model=self.model_name,
system=system_instruction or self.anthropic.NOT_GIVEN,
Expand Down Expand Up @@ -148,8 +150,10 @@ async def ainvoke(
"""
try:
if isinstance(message_history, MessageHistory):
message_history = message_history.messages
messages = self.get_messages(input, message_history)
message_history.add_message(LLMMessage(role="user", content=input))
messages = message_history
else:
messages = self.get_messages(input, message_history)
response = await self.async_client.messages.create(
model=self.model_name,
system=system_instruction or self.anthropic.NOT_GIVEN,
Expand Down
2 changes: 1 addition & 1 deletion src/neo4j_graphrag/llm/cohere_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def get_messages(
raise LLMGenerationError(e.errors()) from e
messages.extend(cast(Iterable[dict[str, Any]], message_history))
messages.append(UserMessage(content=input).model_dump())
return messages # type: ignore
return messages # type: ignore[return-value]

def invoke(
self,
Expand Down
10 changes: 8 additions & 2 deletions src/neo4j_graphrag/llm/mistralai_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,14 @@
from mistralai import Messages, Mistral
from mistralai.models.sdkerror import SDKError
except ImportError:
Mistral = None # type: ignore
SDKError = None # type: ignore
# Define placeholder types for type checking
Messages = None # type: ignore

class Mistral: # type: ignore
pass

class SDKError(Exception): # type: ignore
pass


class MistralAILLM(LLMInterface):
Expand Down
2 changes: 1 addition & 1 deletion src/neo4j_graphrag/llm/ollama_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def get_messages(
raise LLMGenerationError(e.errors()) from e
messages.extend(cast(Iterable[dict[str, Any]], message_history))
messages.append(UserMessage(content=input).model_dump())
return messages # type: ignore
return messages # type: ignore[return-value]

def invoke(
self,
Expand Down
33 changes: 21 additions & 12 deletions tests/unit/experimental/pipeline/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,39 +379,48 @@ async def test_pipeline_async() -> None:
assert pipeline_result[1].result == {"add": {"result": 12}}


def test_pipeline_to_pgv() -> None:
def test_pipeline_to_neo4j_viz() -> None:
pipe = Pipeline()
component_a = ComponentAdd()
component_b = ComponentMultiply()
pipe.add_component(component_a, "a")
pipe.add_component(component_b, "b")
pipe.connect("a", "b", {"number1": "a.result"})
g = pipe.get_pygraphviz_graph()
# 3 nodes:
g = pipe.get_neo4j_viz_graph()
# 4 nodes:
# - 2 components 'a' and 'b'
# - 1 output 'a.result'
assert len(g.nodes()) == 3
g = pipe.get_pygraphviz_graph(hide_unused_outputs=False)
# - 2 outputs 'a.result' and 'b.result' (neo4j-viz implementation includes both)
assert len(g.nodes) == 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related to my comment above about logic, the test should not need to be updated.


# Count component nodes
component_nodes = [node for node in g.nodes if node.size == 20]
assert len(component_nodes) == 2

# Count output nodes
output_nodes = [node for node in g.nodes if node.size == 10]
assert len(output_nodes) == 2

g = pipe.get_neo4j_viz_graph(hide_unused_outputs=False)
# 4 nodes:
# - 2 components 'a' and 'b'
# - 2 output 'a.result' and 'b.result'
assert len(g.nodes()) == 4
# - 2 outputs 'a.result' and 'b.result'
assert len(g.nodes) == 4


def test_pipeline_draw() -> None:
pipe = Pipeline()
pipe.add_component(ComponentAdd(), "add")
t = tempfile.NamedTemporaryFile()
t = tempfile.NamedTemporaryFile(suffix=".html")
pipe.draw(t.name)
content = t.file.read()
assert len(content) > 0


@patch("neo4j_graphrag.experimental.pipeline.pipeline.pgv", None)
def test_pipeline_draw_missing_pygraphviz_dep() -> None:
@patch("neo4j_graphrag.experimental.pipeline.pipeline.HAS_NEO4J_VIZ", False)
def test_pipeline_draw_missing_neo4j_viz_dep() -> None:
pipe = Pipeline()
pipe.add_component(ComponentAdd(), "add")
t = tempfile.NamedTemporaryFile()
t = tempfile.NamedTemporaryFile(suffix=".html")
with pytest.raises(ImportError):
pipe.draw(t.name)

Expand Down
Loading
Loading