Skip to content

Commit 4f915f1

Browse files
committed
Add the capacity for MergeNodes to merge nodes with the same head. Useful for a case we found with two parts of a time that was too tokenized were pointing to the same head, rather than the time being one self-contained phrase
1 parent 120dbba commit 4f915f1

File tree

3 files changed

+152
-40
lines changed

3 files changed

+152
-40
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/MergeNodes.java

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import edu.stanford.nlp.semgraph.SemanticGraph;
1616
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
1717
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
18+
import edu.stanford.nlp.trees.GrammaticalRelation;
19+
import edu.stanford.nlp.util.Pair;
1820

1921
/**
2022
* Combines two words into one word
@@ -60,6 +62,31 @@ public String toEditString() {
6062
return buf.toString();
6163
}
6264

65+
/**
66+
* Test if two nodes have the same parents with the same relations.
67+
* If so, then the two nodes can be treated as equivalent when merging nodes.
68+
* Otherwise, since there are two different heads, we can't pick a node
69+
* to treat as the head of the phrase, and we will have to abort
70+
*/
71+
public static boolean hasSameParents(SemanticGraph sg, IndexedWord head, IndexedWord candidate, Set<IndexedWord> ignoreNodes) {
72+
Set<Pair<IndexedWord, GrammaticalRelation>> headParents = new HashSet<>();
73+
Set<Pair<IndexedWord, GrammaticalRelation>> candidateParents = new HashSet<>();
74+
75+
for (SemanticGraphEdge edge : sg.incomingEdgeIterable(head)) {
76+
// iterating all parents is relevant for enhanced graphs, for example
77+
if (ignoreNodes.contains(edge.getGovernor()))
78+
continue;
79+
headParents.add(new Pair<>(edge.getGovernor(), edge.getRelation()));
80+
}
81+
for (SemanticGraphEdge edge : sg.incomingEdgeIterable(candidate)) {
82+
// iterating all parents is relevant for enhanced graphs, for example
83+
if (ignoreNodes.contains(edge.getGovernor()))
84+
continue;
85+
candidateParents.add(new Pair<>(edge.getGovernor(), edge.getRelation()));
86+
}
87+
return headParents.equals(candidateParents);
88+
}
89+
6390
/**
6491
* If the named nodes are next to each other, and the edges of
6592
* the graph allow for it, squish those words into one word
@@ -76,6 +103,12 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
76103
}
77104

78105
IndexedWord head = null;
106+
// Words who share the same parents will go in this set
107+
// Therefore, we can later remap any edges going to that word
108+
// to point to the chosen head instead
109+
// This will let us process phrases where two words could have
110+
// been the head and both have edges coming in to them
111+
Set<IndexedWord> equivalentHeads = new HashSet<>();
79112
for (IndexedWord candidate : nodeSet) {
80113
Set<IndexedWord> parents = sg.getParents(candidate);
81114
if (parents.size() == 0) {
@@ -96,9 +129,10 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
96129
// parent is outside this subtree
97130
// therefore, we can use this word as the head of the subtree
98131
if (head != null) {
99-
if (parents.equals(sg.getParents(head))) {
100-
// if the parents of the other node are the same, we can keep going
132+
if (hasSameParents(sg, head, candidate, nodeSet)) {
133+
// if the parents *and relations* of the other node are the same, we can keep going
101134
// since the nodes are about to merge anyway
135+
equivalentHeads.add(candidate);
102136
break;
103137
} else {
104138
// if we already have a head with different parents, give up instead
@@ -114,18 +148,36 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
114148
}
115149

116150
// for now, only allow the head to have edges to children outside the subtree
117-
// TODO: instead, could make them all point to the new merged word...
118-
// but it's not clear that's a structure we want to allow merged
151+
// also, words with the same parents as the new head can have outgoing edges
152+
// TODO: not clear we want to allow other words with different
153+
// heads to be merged in this manner
154+
List<SemanticGraphEdge> reattachEdges = new ArrayList<>();
119155
for (IndexedWord candidate : nodeSet) {
120156
if (candidate == head) {
121157
continue;
122158
}
123-
for (IndexedWord child : sg.getChildren(candidate)) {
124-
if (!nodeSet.contains(child)) {
125-
return false;
159+
for (SemanticGraphEdge edge : sg.outgoingEdgeIterable(candidate)) {
160+
IndexedWord gov = edge.getGovernor();
161+
if (gov != candidate) {
162+
throw new AssertionError();
163+
}
164+
IndexedWord dep = edge.getDependent();
165+
if (!nodeSet.contains(dep)) {
166+
if (equivalentHeads.contains(candidate)) {
167+
reattachEdges.add(edge);
168+
} else {
169+
return false;
170+
}
126171
}
127172
}
128173
}
174+
175+
// at this point, everything checks out and we can start manipulating the graph
176+
// we will start by reattaching incoming edges to the chosen head
177+
for (SemanticGraphEdge edge : reattachEdges) {
178+
ReattachNamedEdge.reattachEdge(sg, sm, edge, null, head, edge.getDependent());
179+
}
180+
129181
ArrayList<IndexedWord> nodes = new ArrayList<>(nodeSet);
130182
Collections.sort(nodes);
131183

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/ReattachNamedEdge.java

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,45 @@ public String toEditString() {
5959
return buf.toString();
6060
}
6161

62+
public static boolean reattachEdge(SemanticGraph sg, SemgrexMatcher sm,
63+
SemanticGraphEdge edge, String edgeName, IndexedWord gov, IndexedWord dep) {
64+
if (gov == edge.getSource() && dep == edge.getTarget()) {
65+
// we were asked to point the edge to the same nodes it already pointed to
66+
// nothing to do
67+
return false;
68+
}
69+
boolean success = sg.removeEdge(edge);
70+
if (!success) {
71+
// maybe it was already removed somehow by a previous operation
72+
return false;
73+
}
74+
final SemanticGraphEdge newEdge;
75+
found: {
76+
for (SemanticGraphEdge existingEdge : sg.getAllEdges(edge.getSource(), edge.getTarget())) {
77+
if (existingEdge.getRelation().equals(edge.getRelation())) {
78+
newEdge = existingEdge;
79+
break found;
80+
}
81+
}
82+
newEdge = new SemanticGraphEdge(gov,
83+
dep,
84+
edge.getRelation(),
85+
edge.getWeight(),
86+
edge.isExtra());
87+
sg.addEdge(newEdge);
88+
}
89+
// whether we recreated a new edge with the new relation,
90+
// or found an existing edge with the relation we wanted,
91+
// update the named edge in the SemgrexMatcher so future
92+
// iterations have the name connected to the edge
93+
// TODO: if an existing edge was clobbered, perhaps we need to
94+
// update anything that named it
95+
if (edgeName != null) {
96+
sm.putNamedEdge(edgeName, newEdge);
97+
}
98+
return true;
99+
}
100+
62101
/**
63102
* "Reattach" the named edge by removing it and then recreating it with the new gov and/or dep
64103
*/
@@ -69,39 +108,7 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
69108
if (edge != null) {
70109
final IndexedWord gov = (govNodeName != null) ? sm.getNode(govNodeName) : edge.getSource();
71110
final IndexedWord dep = (depNodeName != null) ? sm.getNode(depNodeName) : edge.getTarget();
72-
if (gov == edge.getSource() && dep == edge.getTarget()) {
73-
// we were asked to point the edge to the same nodes it already pointed to
74-
// nothing to do
75-
return false;
76-
}
77-
boolean success = sg.removeEdge(edge);
78-
if (!success) {
79-
// maybe it was already removed somehow by a previous operation
80-
return false;
81-
}
82-
final SemanticGraphEdge newEdge;
83-
found: {
84-
for (SemanticGraphEdge existingEdge : sg.getAllEdges(edge.getSource(), edge.getTarget())) {
85-
if (existingEdge.getRelation().equals(edge.getRelation())) {
86-
newEdge = existingEdge;
87-
break found;
88-
}
89-
}
90-
newEdge = new SemanticGraphEdge(gov,
91-
dep,
92-
edge.getRelation(),
93-
edge.getWeight(),
94-
edge.isExtra());
95-
sg.addEdge(newEdge);
96-
}
97-
// whether we recreated a new edge with the new relation,
98-
// or found an existing edge with the relation we wanted,
99-
// update the named edge in the SemgrexMatcher so future
100-
// iterations have the name connected to the edge
101-
// TODO: if an existing edge was clobbered, perhaps we need to
102-
// update anything that named it
103-
sm.putNamedEdge(edgeName, newEdge);
104-
return true;
111+
return reattachEdge(sg, sm, edge, edgeName, gov, dep);
105112
}
106113
return false;
107114
}

test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,6 +1271,59 @@ public void readXMLMergeNodesAttributes() {
12711271
assertEquals("bar", prof.lemma());
12721272
}
12731273

1274+
/**
1275+
* This test checks some results when there are two candidate head nodes
1276+
*/
1277+
@Test
1278+
public void readXMLMergeNodesTwoHeads() {
1279+
Ssurgeon inst = Ssurgeon.inst();
1280+
1281+
String merge = String.join(newline,
1282+
"<ssurgeon-pattern-list>",
1283+
" <ssurgeon-pattern>",
1284+
" <uid>38</uid>",
1285+
" <notes>Merge two nodes that should not have been split</notes>",
1286+
" <semgrex>" + XMLUtils.escapeXML("{word:foo}=first . {word:bar}=second") + "</semgrex>",
1287+
" <edit-list>mergeNodes -node first -node second</edit-list>",
1288+
" </ssurgeon-pattern>",
1289+
"</ssurgeon-pattern-list>");
1290+
List<SsurgeonPattern> patterns = inst.readFromString(merge);
1291+
assertEquals(patterns.size(), 1);
1292+
SsurgeonPattern mergeSsurgeon = patterns.get(0);
1293+
1294+
SemanticGraph sg = SemanticGraph.valueOf("[test-1 obj> foo-2 obj> bar-3]");
1295+
SemanticGraph newSG = mergeSsurgeon.iterate(sg).first;
1296+
SemanticGraph expected = SemanticGraph.valueOf("[test-1 obj> foobar-2]");
1297+
assertEquals(expected, newSG);
1298+
1299+
// this one should not have been edited, since the relations are different
1300+
sg = SemanticGraph.valueOf("[test-1 obj> foo-2 nsubj> bar-3]");
1301+
newSG = mergeSsurgeon.iterate(sg).first;
1302+
assertEquals(sg, newSG);
1303+
1304+
// this one should not have been edited, since the heads are different
1305+
sg = SemanticGraph.valueOf("[test-1 obj> [a-2 det> foo-4] nsubj> [b-3 det> bar-5]]");
1306+
newSG = mergeSsurgeon.iterate(sg).first;
1307+
assertEquals(sg, newSG);
1308+
1309+
// various configurations of two nodes with the same head and different children
1310+
// the children in these configurations should all be attached to the new merged node
1311+
sg = SemanticGraph.valueOf("[test-1 obj> [foo-2 nsubj> baz-4] obj> bar-3]");
1312+
newSG = mergeSsurgeon.iterate(sg).first;
1313+
expected = SemanticGraph.valueOf("[test-1 obj> [foobar-2 nsubj> baz-3]]");
1314+
assertEquals(expected, newSG);
1315+
1316+
sg = SemanticGraph.valueOf("[test-1 obj> [foo-2 nsubj> baz-4] obj> [bar-3 det> the-5]]");
1317+
newSG = mergeSsurgeon.iterate(sg).first;
1318+
expected = SemanticGraph.valueOf("[test-1 obj> [foobar-2 nsubj> baz-3 det> the-4]]");
1319+
assertEquals(expected, newSG);
1320+
1321+
sg = SemanticGraph.valueOf("[test-1 obj> foo-2 obj> [bar-3 det> the-4]]");
1322+
newSG = mergeSsurgeon.iterate(sg).first;
1323+
expected = SemanticGraph.valueOf("[test-1 obj> [foobar-2 det> the-3]]");
1324+
assertEquals(expected, newSG);
1325+
}
1326+
12741327
/**
12751328
* Test a basic case of two nodes that should be merged
12761329
*<br>

0 commit comments

Comments
 (0)