15
15
import edu .stanford .nlp .semgraph .SemanticGraph ;
16
16
import edu .stanford .nlp .semgraph .SemanticGraphEdge ;
17
17
import edu .stanford .nlp .semgraph .semgrex .SemgrexMatcher ;
18
+ import edu .stanford .nlp .trees .GrammaticalRelation ;
19
+ import edu .stanford .nlp .util .Pair ;
18
20
19
21
/**
20
22
* Combines two words into one word
@@ -60,6 +62,31 @@ public String toEditString() {
60
62
return buf .toString ();
61
63
}
62
64
65
+ /**
66
+ * Test if two nodes have the same parents with the same relations.
67
+ * If so, then the two nodes can be treated as equivalent when merging nodes.
68
+ * Otherwise, since there are two different heads, we can't pick a node
69
+ * to treat as the head of the phrase, and we will have to abort
70
+ */
71
+ public static boolean hasSameParents (SemanticGraph sg , IndexedWord head , IndexedWord candidate , Set <IndexedWord > ignoreNodes ) {
72
+ Set <Pair <IndexedWord , GrammaticalRelation >> headParents = new HashSet <>();
73
+ Set <Pair <IndexedWord , GrammaticalRelation >> candidateParents = new HashSet <>();
74
+
75
+ for (SemanticGraphEdge edge : sg .incomingEdgeIterable (head )) {
76
+ // iterating all parents is relevant for enhanced graphs, for example
77
+ if (ignoreNodes .contains (edge .getGovernor ()))
78
+ continue ;
79
+ headParents .add (new Pair <>(edge .getGovernor (), edge .getRelation ()));
80
+ }
81
+ for (SemanticGraphEdge edge : sg .incomingEdgeIterable (candidate )) {
82
+ // iterating all parents is relevant for enhanced graphs, for example
83
+ if (ignoreNodes .contains (edge .getGovernor ()))
84
+ continue ;
85
+ candidateParents .add (new Pair <>(edge .getGovernor (), edge .getRelation ()));
86
+ }
87
+ return headParents .equals (candidateParents );
88
+ }
89
+
63
90
/**
64
91
* If the named nodes are next to each other, and the edges of
65
92
* the graph allow for it, squish those words into one word
@@ -76,6 +103,12 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
76
103
}
77
104
78
105
IndexedWord head = null ;
106
+ // Words who share the same parents will go in this set
107
+ // Therefore, we can later remap any edges going to that word
108
+ // to point to the chosen head instead
109
+ // This will let us process phrases where two words could have
110
+ // been the head and both have edges coming in to them
111
+ Set <IndexedWord > equivalentHeads = new HashSet <>();
79
112
for (IndexedWord candidate : nodeSet ) {
80
113
Set <IndexedWord > parents = sg .getParents (candidate );
81
114
if (parents .size () == 0 ) {
@@ -96,9 +129,10 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
96
129
// parent is outside this subtree
97
130
// therefore, we can use this word as the head of the subtree
98
131
if (head != null ) {
99
- if (parents . equals (sg . getParents ( head ) )) {
100
- // if the parents of the other node are the same, we can keep going
132
+ if (hasSameParents (sg , head , candidate , nodeSet )) {
133
+ // if the parents *and relations* of the other node are the same, we can keep going
101
134
// since the nodes are about to merge anyway
135
+ equivalentHeads .add (candidate );
102
136
break ;
103
137
} else {
104
138
// if we already have a head with different parents, give up instead
@@ -114,18 +148,36 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
114
148
}
115
149
116
150
// for now, only allow the head to have edges to children outside the subtree
117
- // TODO: instead, could make them all point to the new merged word...
118
- // but it's not clear that's a structure we want to allow merged
151
+ // also, words with the same parents as the new head can have outgoing edges
152
+ // TODO: not clear we want to allow other words with different
153
+ // heads to be merged in this manner
154
+ List <SemanticGraphEdge > reattachEdges = new ArrayList <>();
119
155
for (IndexedWord candidate : nodeSet ) {
120
156
if (candidate == head ) {
121
157
continue ;
122
158
}
123
- for (IndexedWord child : sg .getChildren (candidate )) {
124
- if (!nodeSet .contains (child )) {
125
- return false ;
159
+ for (SemanticGraphEdge edge : sg .outgoingEdgeIterable (candidate )) {
160
+ IndexedWord gov = edge .getGovernor ();
161
+ if (gov != candidate ) {
162
+ throw new AssertionError ();
163
+ }
164
+ IndexedWord dep = edge .getDependent ();
165
+ if (!nodeSet .contains (dep )) {
166
+ if (equivalentHeads .contains (candidate )) {
167
+ reattachEdges .add (edge );
168
+ } else {
169
+ return false ;
170
+ }
126
171
}
127
172
}
128
173
}
174
+
175
+ // at this point, everything checks out and we can start manipulating the graph
176
+ // we will start by reattaching incoming edges to the chosen head
177
+ for (SemanticGraphEdge edge : reattachEdges ) {
178
+ ReattachNamedEdge .reattachEdge (sg , sm , edge , null , head , edge .getDependent ());
179
+ }
180
+
129
181
ArrayList <IndexedWord > nodes = new ArrayList <>(nodeSet );
130
182
Collections .sort (nodes );
131
183
0 commit comments