Skip to content

Commit

Permalink
Make Operations#union merge accept states that have no outgoing trans…
Browse files Browse the repository at this point in the history
…ition. (#14207)

This helps generate simpler automata, especially when these automata are later
combined through other operations such as `Operations#concat`.
  • Loading branch information
jpountz authored Feb 10, 2025
1 parent ad7ff1f commit 3208920
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ public static Automaton union(Collection<Automaton> list) {

result.finishState();

return removeDeadStates(result);
return mergeAcceptStatesWithNoTransition(removeDeadStates(result));
}

// Simple custom ArrayList<Transition>
Expand Down Expand Up @@ -1060,6 +1060,82 @@ public static Automaton removeDeadStates(Automaton a) {
return result;
}

/**
* Merge all accept states that don't have outgoing transitions to a single shared state. This is
* a subset of minimization that is much cheaper. This helper is useful because operations like
* concatenation need to connect accept states of an automaton with the start state of the next
* one, so having fewer accept states makes the produced automata simpler.
*/
static Automaton mergeAcceptStatesWithNoTransition(Automaton a) {
int numStates = a.getNumStates();

int numAcceptStatesWithNoTransition = 0;
int[] acceptStatesWithNoTransition = new int[0];

BitSet acceptStates = a.getAcceptStates();
for (int i = 0; i < numStates; ++i) {
if (acceptStates.get(i) && a.getNumTransitions(i) == 0) {
acceptStatesWithNoTransition =
ArrayUtil.grow(acceptStatesWithNoTransition, 1 + numAcceptStatesWithNoTransition);
acceptStatesWithNoTransition[numAcceptStatesWithNoTransition++] = i;
}
}

if (numAcceptStatesWithNoTransition <= 1) {
// No states to merge
return a;
}

// Shrink for simplicity.
acceptStatesWithNoTransition =
ArrayUtil.copyOfSubArray(acceptStatesWithNoTransition, 0, numAcceptStatesWithNoTransition);

// Now copy states, preserving accept states.
Automaton result = new Automaton();
for (int s = 0; s < numStates; s++) {
int remappedS = remap(s, acceptStatesWithNoTransition);
while (result.getNumStates() <= remappedS) {
result.createState();
}
if (acceptStates.get(s)) {
result.setAccept(remappedS, true);
}
}

// Now copy transitions, making sure to remap states.
Transition t = new Transition();
for (int s = 0; s < numStates; ++s) {
int remappedSource = remap(s, acceptStatesWithNoTransition);
int numTransitions = a.initTransition(s, t);
for (int j = 0; j < numTransitions; j++) {
a.getNextTransition(t);
int remappedDest = remap(t.dest, acceptStatesWithNoTransition);
result.addTransition(remappedSource, remappedDest, t.min, t.max);
}
}

result.finishState();
return result;
}

private static int remap(int s, int[] combinedStates) {
int idx = Arrays.binarySearch(combinedStates, s);
if (idx >= 0) {
// This state is part of the states that get combined, remap to the first one.
return combinedStates[0];
} else {
idx = -1 - idx;
if (idx <= 1) {
// There is either no combined state before the current state, or only the first one, which
// we're preserving: no renumbering needed.
return s;
} else {
// Subtract the number of states that get combined into the first combined state.
return s - (idx - 1);
}
}
}

/**
* Returns the longest string that is a prefix of all accepted strings and visits each state at
* most once. The automaton must not have dead states. If this automaton has already been
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,54 @@ public void testRepeat() {
Operations.determinize(Operations.repeat(aOrAb), Integer.MAX_VALUE)));
}

public void testMergeAcceptStatesWithNoTransition() {
Automaton emptyLanguage = Automata.makeEmpty();
assertSame(emptyLanguage, Operations.mergeAcceptStatesWithNoTransition(emptyLanguage));

Automaton a = Automata.makeString("a");
assertSame(a, Operations.mergeAcceptStatesWithNoTransition(a));

// All accept states get combined
Automaton aOrC = new Automaton();
aOrC.createState();
aOrC.createState();
aOrC.createState();
aOrC.addTransition(0, 1, 'a');
aOrC.setAccept(1, true);
aOrC.addTransition(0, 2, 'c');
aOrC.setAccept(2, true);
Automaton aOrCSingleAcceptState = Operations.mergeAcceptStatesWithNoTransition(aOrC);
assertEquals(1, aOrCSingleAcceptState.getAcceptStates().cardinality());
assertTrue(AutomatonTestUtil.sameLanguage(aOrC, aOrCSingleAcceptState));

// Two accept states get combined, but not the 3rd one since it has an outgoing transition
Automaton aOrCOrXStar = new Automaton();
aOrCOrXStar.createState();
aOrCOrXStar.createState();
aOrCOrXStar.createState();
aOrCOrXStar.createState();
aOrCOrXStar.addTransition(0, 1, 'a');
aOrCOrXStar.setAccept(1, true);
aOrCOrXStar.addTransition(0, 2, 'c');
aOrCOrXStar.setAccept(2, true);
aOrCOrXStar.addTransition(0, 3, 'x');
aOrCOrXStar.addTransition(3, 3, 'x');
aOrCOrXStar.setAccept(3, true);
Automaton aOrCOrXStarSingleAcceptState =
Operations.mergeAcceptStatesWithNoTransition(aOrCOrXStar);
assertEquals(2, aOrCOrXStarSingleAcceptState.getAcceptStates().cardinality());
assertTrue(AutomatonTestUtil.sameLanguage(aOrCOrXStar, aOrCOrXStarSingleAcceptState));

int iters = atLeast(100);
for (int iter = 0; iter < iters; iter++) {
// sameLangage requires a deterministic automaton
Automaton expected =
Operations.determinize(AutomatonTestUtil.randomAutomaton(random()), Integer.MAX_VALUE);
Automaton actual = Operations.mergeAcceptStatesWithNoTransition(expected);
assertTrue(AutomatonTestUtil.sameLanguage(expected, actual));
}
}

public void testDuelRepeat() {
final int iters = atLeast(1_000);
for (int iter = 0; iter < iters; ++iter) {
Expand Down

0 comments on commit 3208920

Please sign in to comment.