49
49
from neo4j_graphrag .schema import get_structured_schema
50
50
51
51
52
+ logger = logging .getLogger (__name__ )
53
+
54
+
52
55
class PropertyType (BaseModel ):
53
56
"""
54
57
Represents a property on a node or relationship in the graph.
@@ -622,19 +625,19 @@ async def run(self, text: str, examples: str = "", **kwargs: Any) -> GraphSchema
622
625
class SchemaFromExistingGraphExtractor (BaseSchemaBuilder ):
623
626
"""A class to build a GraphSchema object from an existing graph.
624
627
625
- Uses the get_structured_schema function to extract existing node labels,
626
- relationship types, properties and existence constraints.
628
+ Uses the get_structured_schema function to extract existing node labels,
629
+ relationship types, properties and existence constraints.
627
630
628
- By default, the built schema does not allow any additional item (property,
629
- node label, relationship type or pattern).
631
+ By default, the built schema does not allow any additional item (property,
632
+ node label, relationship type or pattern).
630
633
631
- Args:
632
- driver (neo4j.Driver): connection to the neo4j database.
633
- additional_properties (bool, default False): see GraphSchema
634
- additional_node_types (bool, default False): see GraphSchema
635
- additional_relationship_types (bool, default False): see GraphSchema:
636
- additional_patterns (bool, default False): see GraphSchema:
637
- neo4j_database (Optional | str): name of the neo4j database to use
634
+ Args:
635
+ driver (neo4j.Driver): connection to the neo4j database.
636
+ additional_properties (bool, default False): see GraphSchema
637
+ additional_node_types (bool, default False): see GraphSchema
638
+ additional_relationship_types (bool, default False): see GraphSchema:
639
+ additional_patterns (bool, default False): see GraphSchema:
640
+ neo4j_database (Optional | str): name of the neo4j database to use
638
641
"""
639
642
640
643
def __init__ (
@@ -672,7 +675,7 @@ def _extract_required_properties(
672
675
"""
673
676
schema_metadata = structured_schema .get ("metadata" , {})
674
677
existence_constraint = [] # list of (node label, property name)
675
- for constraint in schema_metadata .get ("constraints " , []):
678
+ for constraint in schema_metadata .get ("constraint " , []):
676
679
if constraint ["type" ] in (
677
680
"NODE_PROPERTY_EXISTENCE" ,
678
681
"NODE_KEY" ,
@@ -688,10 +691,11 @@ def _extract_required_properties(
688
691
existence_constraint .append ((lab , prop ))
689
692
return existence_constraint
690
693
691
- async def run (self ) -> GraphSchema :
694
+ async def run (self , * args , ** kwargs ) -> GraphSchema :
692
695
structured_schema = get_structured_schema (self .driver , database = self .database )
693
696
existence_constraint = self ._extract_required_properties (structured_schema )
694
697
698
+ # node label with properties
695
699
node_labels = set (structured_schema ["node_props" ].keys ())
696
700
node_types = [
697
701
{
@@ -708,6 +712,8 @@ async def run(self) -> GraphSchema:
708
712
}
709
713
for key , properties in structured_schema ["node_props" ].items ()
710
714
]
715
+
716
+ # relationships with properties
711
717
rel_labels = set (structured_schema ["rel_props" ].keys ())
712
718
relationship_types = [
713
719
{
@@ -723,27 +729,41 @@ async def run(self) -> GraphSchema:
723
729
}
724
730
for key , properties in structured_schema ["rel_props" ].items ()
725
731
]
732
+
726
733
patterns = [
727
734
(s ["start" ], s ["type" ], s ["end" ])
728
735
for s in structured_schema ["relationships" ]
729
736
]
737
+
730
738
# deal with nodes and relationships without properties
731
739
for source , rel , target in patterns :
732
740
if source not in node_labels :
741
+ if not self .additional_properties :
742
+ logger .warning (
743
+ f"SCHEMA: found node label { source } without property and additional_properties=False: this node label will always be pruned!"
744
+ )
733
745
node_labels .add (source )
734
746
node_types .append (
735
747
{
736
748
"label" : source ,
737
749
}
738
750
)
739
751
if target not in node_labels :
752
+ if not self .additional_properties :
753
+ logger .warning (
754
+ f"SCHEMA: found node label { target } without property and additional_properties=False: this node label will always be pruned!"
755
+ )
740
756
node_labels .add (target )
741
757
node_types .append (
742
758
{
743
759
"label" : target ,
744
760
}
745
761
)
746
762
if rel not in rel_labels :
763
+ if not self .additional_properties :
764
+ logger .warning (
765
+ f"SCHEMA: found relationship type { rel } without property and additional_properties=False: this relationship type will always be pruned!"
766
+ )
747
767
rel_labels .add (rel )
748
768
relationship_types .append (
749
769
{
0 commit comments