Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Operations.reverse() to not add non-deterministic dead states #14212

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,14 @@ public Transition[][] getSortedTransitions() {
return transitions;
}

/** Returns accept states. If the bit is set then that state is an accept state. */
BitSet getAcceptStates() {
/**
* Returns accept states. If the bit is set then that state is an accept state.
*
* <p>expert: Use {@link #isAccept(int)} instead, unless you really need to scan bits.
*
* @lucene.internal This method signature may change in the future
*/
public BitSet getAcceptStates() {
return isAccept;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.internal.hppc.BitMixer;
import org.apache.lucene.internal.hppc.IntCursor;
import org.apache.lucene.internal.hppc.IntHashSet;
Expand Down Expand Up @@ -969,7 +968,7 @@ private static BitSet getLiveStatesFromInitial(Automaton a) {
private static BitSet getLiveStatesToAccept(Automaton a) {
Automaton.Builder builder = new Automaton.Builder();

// NOTE: not quite the same thing as what SpecialOperations.reverse does:
// NOTE: not quite the same thing as what reverse() does:
Transition t = new Transition();
int numStates = a.getNumStates();
for (int s = 0; s < numStates; s++) {
Expand Down Expand Up @@ -1177,7 +1176,7 @@ public static IntsRef getSingleton(Automaton a) {
*/
public static BytesRef getCommonSuffixBytesRef(Automaton a) {
// reverse the language of the automaton, then reverse its common prefix.
Automaton r = removeDeadStates(reverse(a));
Automaton r = reverse(a);
BytesRef ref = getCommonPrefixBytesRef(r);
reverseBytes(ref);
return ref;
Expand All @@ -1195,12 +1194,6 @@ private static void reverseBytes(BytesRef ref) {

/** Returns an automaton accepting the reverse language. */
public static Automaton reverse(Automaton a) {
return reverse(a, null);
}

/** Reverses the automaton, returning the new initial states. */
public static Automaton reverse(Automaton a, Set<Integer> initialStates) {

if (Operations.isEmpty(a)) {
return new Automaton();
}
Expand Down Expand Up @@ -1236,15 +1229,12 @@ public static Automaton reverse(Automaton a, Set<Integer> initialStates) {
BitSet acceptStates = a.getAcceptStates();
while (s < numStates && (s = acceptStates.nextSetBit(s)) != -1) {
result.addEpsilon(0, s + 1);
if (initialStates != null) {
initialStates.add(s + 1);
}
s++;
}

result.finishState();

return result;
return removeDeadStates(result);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,30 +137,6 @@ public void testCommonPrefixTrailingKleenStar() throws Exception {
assertEquals("boo", Operations.getCommonPrefix(a));
}

public void testCommonPrefixDeadStates() throws Exception {
Automaton a = Operations.concatenate(Automata.makeAnyString(), Automata.makeString("boo"));
// reverse it twice, to create some dead states
// TODO: is it possible to fix reverse() to not create dead states?!
Automaton withDeadStates = Operations.reverse(Operations.reverse(a));
IllegalArgumentException expected =
expectThrows(
IllegalArgumentException.class,
() -> {
Operations.getCommonPrefix(withDeadStates);
});
assertEquals("input automaton has dead states", expected.getMessage());
}

public void testCommonPrefixRemoveDeadStates() throws Exception {
Automaton a = Operations.concatenate(Automata.makeAnyString(), Automata.makeString("boo"));
// reverse it twice, to create some dead states
// TODO: is it possible to fix reverse() to not create dead states?!
Automaton withDeadStates = Operations.reverse(Operations.reverse(a));
// now remove the deadstates
Automaton withoutDeadStates = Operations.removeDeadStates(withDeadStates);
assertEquals("", Operations.getCommonPrefix(withoutDeadStates));
}

public void testCommonPrefixOptional() throws Exception {
Automaton a = new Automaton();
int init = a.createState();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,12 +329,64 @@ public static Automaton randomAutomaton(Random random) {
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

/**
* Original brics implementation of reverse(). It tries to satisfy multiple use-cases by
* populating a set of initial states too.
*/
public static Automaton reverseOriginal(Automaton a, Set<Integer> initialStates) {

if (Operations.isEmpty(a)) {
return new Automaton();
}

int numStates = a.getNumStates();

// Build a new automaton with all edges reversed
Automaton.Builder builder = new Automaton.Builder();

// Initial node; we'll add epsilon transitions in the end:
builder.createState();

for (int s = 0; s < numStates; s++) {
builder.createState();
}

// Old initial state becomes new accept state:
builder.setAccept(1, true);

Transition t = new Transition();
for (int s = 0; s < numStates; s++) {
int numTransitions = a.getNumTransitions(s);
a.initTransition(s, t);
for (int i = 0; i < numTransitions; i++) {
a.getNextTransition(t);
builder.addTransition(t.dest + 1, s + 1, t.min, t.max);
}
}

Automaton result = builder.finish();

int s = 0;
BitSet acceptStates = a.getAcceptStates();
while (s < numStates && (s = acceptStates.nextSetBit(s)) != -1) {
result.addEpsilon(0, s + 1);
if (initialStates != null) {
initialStates.add(s + 1);
}
s++;
}

result.finishState();

return result;
}

/** Simple, original brics implementation of Brzozowski minimize() */
public static Automaton minimizeSimple(Automaton a) {
Set<Integer> initialSet = new HashSet<Integer>();
a = determinizeSimple(Operations.reverse(a, initialSet), initialSet);
a = determinizeSimple(reverseOriginal(a, initialSet), initialSet);
initialSet.clear();
a = determinizeSimple(Operations.reverse(a, initialSet), initialSet);
a = determinizeSimple(reverseOriginal(a, initialSet), initialSet);
return a;
}

Expand Down