From 200f33feadf8f2b71d53821272074715922166a7 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 6 Feb 2025 12:01:32 +0100 Subject: [PATCH 1/2] Automatically remove dead states from concatenated automata. Concatenating automata frequently creates dead states. This PR suggests that `Operations#concatenate` automatically removes these dead states. This is not unseen: `Operations#repeat`, `Operations#union` and other automaton manipulation utilities also automatically remove dead states. --- .../src/java/org/apache/lucene/util/automaton/Operations.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java index 7c2b164aa107..4f000d4fc9eb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java @@ -149,7 +149,7 @@ public static Automaton concatenate(List l) { result.finishState(); - return result; + return removeDeadStates(result); } /** From 389788c5299d0146d9a90fc50341a716a9a34c95 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 6 Feb 2025 13:10:25 +0100 Subject: [PATCH 2/2] fix test --- .../org/apache/lucene/util/automaton/TestAutomaton.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java index 3c7d6eea198a..5c587f93d1ba 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java @@ -667,8 +667,12 @@ public void testConcatenatePreservesDet() throws Exception { } public void testRemoveDeadStates() throws Exception { - Automaton a = - Operations.concatenate(Arrays.asList(Automata.makeString("x"), Automata.makeString("y"))); + Automaton a = new Automaton(); + a.createState(); + a.copy(Automata.makeString("xy")); + a.addEpsilon(0, 1); // State 1 is dead, nothing leads to it. + a.finishState(); + assertEquals(4, a.getNumStates()); a = Operations.removeDeadStates(a); assertEquals(3, a.getNumStates());