diff --git a/src/main/kotlin/org/srcgll/grammar/combinator/README.md b/src/main/kotlin/org/srcgll/grammar/combinator/README.md index 55e9ec592..baaf2025c 100644 --- a/src/main/kotlin/org/srcgll/grammar/combinator/README.md +++ b/src/main/kotlin/org/srcgll/grammar/combinator/README.md @@ -1,4 +1,4 @@ -# Grammar combinator +# Grammar combinator Kotlin DSL for describing context-free grammars. @@ -15,14 +15,14 @@ S = A* *DSL* ```kotlin class AStar : Grammar() { - var A = Term("a") - var S by NT() + var A = Term("a") + var S by NT() - init { - setStart(S) - S = Many(A) - } + init { + setStart(S) + S = Many(A) } +} ``` ### Non-terminals @@ -32,13 +32,13 @@ Non-terminals must be fields of the grammar class. Be sure to declare using dele Start non-terminal set with method `setStart(nt)`. Can be set once for grammar. -### Terminals +### Terminals `val A = Term("a")` `val B = Term(42)` -Terminal is a generic class. Can store terminals of any type. Terminals are compared based on their content. +Terminal is a generic class. Can store terminals of any type. Terminals are compared based on their content. They can be declared as fields of a grammar class or directly in productions. @@ -55,19 +55,19 @@ S3 = '{' S '}' S *DSL* ```kotlin class DyckGrammar : Grammar() { - var S by NT() - var S1 by NT() - var S2 by NT() - var S3 by NT() - - init { - setStart(S) - S = S1 or S2 or S3 or Epsilon - S1 = Term("(") * S * Term(")") * S - S2 = Term("[") * S * Term("]") * S - S3 = Term("{") * S * Term("}") * S - } + var S by NT() + var S1 by NT() + var S2 by NT() + var S3 by NT() + + init { + setStart(S) + S = S1 or S2 or S3 or Epsilon + S1 = Term("(") * S * Term(")") * S + S2 = Term("[") * S * Term("]") * S + S3 = Term("{") * S * Term("}") * S } +} ``` ### Production A → B = A = B @@ -76,24 +76,81 @@ A → B = A = B (.): Σ∗ × Σ∗ → Σ∗ a . b = a * b +```kotlin +class AB : Grammar() { + var S by NT() + init { + setStart(S) + S = Term("a") * Term("b") + } + } +``` ### Alternative a | b = a or b +```kotlin +class AStar : Grammar() { + var S by NT() + + init { + setStart(S) + S = Term("a") or S or Epsilon + } + } +``` + ### Kleene Star $a* = U_{i=0}^{\inf}a^i$ a* = Many(a) -`todo: a+ = some(a)` +```kotlin +class AStar : Grammar() { + var S by NT() + + init { + setStart(S) + S = many(Term("a")) + } + } +``` + +### Some +$a* = U_{i=1}^{\inf}a^i$ + +a+ = some(a) -### Optional +```kotlin +class AStar : Grammar() { + var S by NT() + + init { + setStart(S) + S = some(Term("a")) or Epsilon + } + } +``` + +### Optional a? -> a | Epsilon Epsilon -- constant terminal with behavior corresponding to the $\epsilon$ terminal (empty string). -`todo: a? = opt(a)` +a? = opt(a) + +```kotlin +class AStar : Grammar() { + var S by NT() + + init { + setStart(S) + S = opt(Term("a")) * S + } + } +``` -## RSM +## RSM DSL allows to get the RSM corresponding to the grammar using the `getRsm` method. The algorithm of RSM construction is based on Brzozowski derivations. + diff --git a/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Alternative.kt b/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Alternative.kt index e2b23df8b..25ba4bdc3 100644 --- a/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Alternative.kt +++ b/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Alternative.kt @@ -2,16 +2,13 @@ package org.srcgll.grammar.combinator.regexp data class Alternative -( - internal val left : Regexp, - internal val right : Regexp, -) - : Regexp -{ + ( + internal val left: Regexp, + internal val right: Regexp, +) : Regexp { companion object { - fun makeAlternative(left : Regexp, right : Regexp) : Regexp - { - if (left is Empty) return right + fun makeAlternative(left: Regexp, right: Regexp): Regexp { + if (left is Empty) return right if (right is Empty) return left if (left is Alternative && (right == left.left || right == left.right)) { @@ -22,22 +19,22 @@ data class Alternative } return if (left == right) left else Alternative(left, right) } + + fun makeAlternative(literals: Iterable): Regexp { + val terms = literals.map { Term(it) } + val initial: Regexp = terms[0] or terms[1] + + return terms.subList(2, terms.size) + .fold(initial) { acc: Regexp, i: Term -> Alternative.makeAlternative(acc, i) } + } } - override fun derive(symbol : DerivedSymbol) : Regexp - { + override fun derive(symbol: DerivedSymbol): Regexp { return makeAlternative(left.derive(symbol), right.derive(symbol)) } } -infix fun Regexp.or(other : Regexp) : Regexp = Alternative.makeAlternative(left = this, other) - -fun makeAlternative(literals : Iterable) : Regexp -{ - val terms = literals.map { Term(it) } - val initial : Regexp = terms[0] or terms[1] +infix fun Regexp.or(other: Regexp): Regexp = Alternative.makeAlternative(left = this, other) - return terms.subList(2, terms.size) - .fold(initial) { acc : Regexp, i : Term -> Alternative.makeAlternative(acc, i) } -} \ No newline at end of file +fun opt(exp: Regexp): Regexp = Alternative.makeAlternative(exp, Epsilon) \ No newline at end of file diff --git a/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Concatenation.kt b/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Concatenation.kt index 49e2c67f5..23744b3f9 100644 --- a/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Concatenation.kt +++ b/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Concatenation.kt @@ -1,33 +1,38 @@ package org.srcgll.grammar.combinator.regexp data class Concat -( - internal val head : Regexp, - internal val tail : Regexp, -) - : Regexp -{ + ( + internal val head: Regexp, + internal val tail: Regexp, +) : Regexp { /* D[s](h.t) = acceptEps(h).D[s](t) | D[s](h).t */ - override fun derive(symbol : DerivedSymbol) : Regexp - { + override fun derive(symbol: DerivedSymbol): Regexp { val newHead = head.derive(symbol) if (!head.acceptEpsilon()) { return when (newHead) { - Empty -> Empty + Empty -> Empty Epsilon -> tail - else -> Concat(newHead, tail) + else -> Concat(newHead, tail) } } return when (newHead) { - Empty -> tail.derive(symbol) + Empty -> tail.derive(symbol) Epsilon -> Alternative.makeAlternative(tail, tail.derive(symbol)) - else -> Alternative.makeAlternative(Concat(newHead, tail), tail.derive(symbol)) + else -> Alternative.makeAlternative(Concat(newHead, tail), tail.derive(symbol)) } } } -infix operator fun Regexp.times(other : Regexp) : Concat = Concat(head = this, other) \ No newline at end of file +infix operator fun Regexp.times(other: Regexp): Concat = Concat(head = this, other) + +fun makeConcat(vararg literals: T): Regexp { + val terms = literals.map { Term(it) } + val initial: Regexp = Concat(terms[0], terms[1]) + + return terms.subList(2, terms.size) + .fold(initial) { acc: Regexp, i: Term -> Concat(acc, i) } +} \ No newline at end of file diff --git a/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Many.kt b/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Many.kt index 7780780e4..f88b2701b 100644 --- a/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Many.kt +++ b/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Many.kt @@ -8,9 +8,8 @@ data class Many { override fun derive(symbol : DerivedSymbol) : Regexp { - val newReg = exp.derive(symbol) - return when (newReg) { + return when (val newReg = exp.derive(symbol)) { Epsilon -> Many(exp) Empty -> Empty else -> Concat(newReg, Many(exp)) @@ -18,5 +17,4 @@ data class Many } } -val Regexp.many : Many - get() = Many(this) \ No newline at end of file +fun some(exp: Regexp) = (exp * Many(exp)) \ No newline at end of file diff --git a/src/main/kotlin/org/srcgll/grammar/combinator/regexp/NT.kt b/src/main/kotlin/org/srcgll/grammar/combinator/regexp/NT.kt index fa1d58fba..9cc620254 100644 --- a/src/main/kotlin/org/srcgll/grammar/combinator/regexp/NT.kt +++ b/src/main/kotlin/org/srcgll/grammar/combinator/regexp/NT.kt @@ -10,17 +10,19 @@ import java.util.* import kotlin.reflect.KProperty open class NT : DerivedSymbol { - private lateinit var nonTerm: Nonterminal - private lateinit var rsmDescription: Regexp + protected open lateinit var nonTerm: Nonterminal + protected lateinit var rsmDescription: Regexp - private fun getNewState(regex: Regexp): RSMState { - return RSMState(nonTerm, isStart = false, regex.acceptEpsilon()) + protected fun getNewState(regex: Regexp, isStart: Boolean = false): RSMState { + return RSMState(nonTerm, isStart, regex.acceptEpsilon()) } - fun buildRsmBox(): RSMState { + open fun buildRsmBox(): RSMState = buildRsmBox(nonTerm.startState) + + protected fun buildRsmBox(startState: RSMState): RSMState { val regexpToProcess = Stack() val regexpToRsmState = HashMap() - regexpToRsmState[rsmDescription] = nonTerm.startState + regexpToRsmState[rsmDescription] = startState val alphabet = rsmDescription.getAlphabet() @@ -53,7 +55,7 @@ open class NT : DerivedSymbol { } } } - return nonTerm.startState + return startState } override fun getNonterminal(): Nonterminal? { @@ -73,4 +75,36 @@ open class NT : DerivedSymbol { } operator fun getValue(grammar: Grammar, property: KProperty<*>): Regexp = this + +} + +/** + * Helper class for building rsm delta when deleting/adding rules to the grammar. + * Uses existing grammar nonterminal + */ +class StandAloneNt(nonterminal: Nonterminal) : NT() { + init { + nonTerm = nonterminal + } + + /** + * Set description of Rsm, may be recursive + */ + fun setDescription(description: Regexp){ + rsmDescription = description + } + + /** + * Create new start state for RsmBox + * Otherwise the origin of the Rsm will be ruined. + */ + override fun buildRsmBox(): RSMState = buildRsmBox(getNewState(rsmDescription, true)) + + /** + * Build rsm from given description in regexp + */ + fun buildRsm(description: Regexp): RSMState{ + rsmDescription = description + return buildRsmBox() + } } \ No newline at end of file diff --git a/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Regexp.kt b/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Regexp.kt index 5da3a865f..29a6ddf37 100644 --- a/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Regexp.kt +++ b/src/main/kotlin/org/srcgll/grammar/combinator/regexp/Regexp.kt @@ -3,52 +3,47 @@ package org.srcgll.grammar.combinator.regexp import org.srcgll.rsm.symbol.Nonterminal -sealed interface Regexp -{ +sealed interface Regexp { /* Based on Brzozowski derivative */ - fun derive(symbol : DerivedSymbol) : Regexp - fun getNonterminal() : Nonterminal? = null + fun derive(symbol: DerivedSymbol): Regexp + fun getNonterminal(): Nonterminal? = null /* Does the expression accept an epsilon */ - fun acceptEpsilon() : Boolean - { + fun acceptEpsilon(): Boolean { return when (this) { - is Empty -> false - is Epsilon -> true + is Empty -> false + is Epsilon -> true is DerivedSymbol -> false - is Concat -> head.acceptEpsilon() && tail.acceptEpsilon() - is Alternative -> left.acceptEpsilon() || right.acceptEpsilon() - is Many -> true + is Concat -> head.acceptEpsilon() && tail.acceptEpsilon() + is Alternative -> left.acceptEpsilon() || right.acceptEpsilon() + is Many -> true } } - fun getAlphabet() : Set - { + fun getAlphabet(): Set { return when (this) { - is Empty -> emptySet() - is Epsilon -> emptySet() + is Empty -> emptySet() + is Epsilon -> emptySet() is DerivedSymbol -> setOf(this) - is Concat -> head.getAlphabet() + tail.getAlphabet() - is Alternative -> left.getAlphabet() + right.getAlphabet() - is Many -> exp.getAlphabet() + is Concat -> head.getAlphabet() + tail.getAlphabet() + is Alternative -> left.getAlphabet() + right.getAlphabet() + is Many -> exp.getAlphabet() } } } -data object Epsilon : Regexp -{ +data object Epsilon : Regexp { override fun derive(symbol: DerivedSymbol): Regexp = Empty } /* Regular expression that does not accept any input string. */ -data object Empty : Regexp -{ +data object Empty : Regexp { override fun derive(symbol: DerivedSymbol): Regexp = this } diff --git a/src/main/kotlin/org/srcgll/rsm/DynamicRsm.kt b/src/main/kotlin/org/srcgll/rsm/DynamicRsm.kt new file mode 100644 index 000000000..87fb6cb95 --- /dev/null +++ b/src/main/kotlin/org/srcgll/rsm/DynamicRsm.kt @@ -0,0 +1,328 @@ +package org.srcgll.rsm + +import org.srcgll.rsm.symbol.Nonterminal +import org.srcgll.rsm.symbol.Symbol +import org.srcgll.rsm.symbol.Terminal + +/** + * Modified implementation of the Incremental algorithm for dynamic rsm change: + * adding and removing line inputs + * [Incremental Construction and Maintenance of Minimal Finite-State Automata] + * (https://aclanthology.org/J02-2004) (Carrasco & Forcada, CL 2002) + */ +class DynamicRsm(private val origin: RSMState) { + data class CloneState(val origin: RSMState, val delta: RSMState) + + private val botDelta = RSMState(origin.nonterminal) + private val botOrigin = RSMState(origin.nonterminal) + private val cloneStatesToParents = hashMapOf() + private val register = mutableSetOf() + private lateinit var commonStart: RSMState + private var isRemoving = false + + /** + * Only linear input + */ + fun constructIncremental(delta: RSMState, isRemoving: Boolean) { + this.isRemoving = isRemoving + registerOriginStates() + commonStart = clone(origin, delta) + addDeltaStates() + val unreachable = unregisterUnreachable() + restoreUnreachableOrigins(unreachable) + mergeOrRegister() + removeDeadlocks() + } + + + /** + * Check queue and cloned states one by one from the end + */ + private fun mergeOrRegister() { + val used = hashSetOf() + + /** + * Redirect into newState those transitions coming into oldState + * States must be equivalent on output + */ + fun merge(oldState: RSMState, newState: RSMState) { + val incomingEdges = calculateIncomingEdges(commonStart) + val edges = incomingEdges[oldState] ?: emptySet() + for (edge in edges) { + edge.state.removeEdge(Edge(oldState, edge.symbol)) + edge.state.addEdge(edge.symbol, newState) + } + } + + fun mergeRecursive(state: RSMState) { + if (!used.contains(state)) { + used.add(state) + for (outEdge in state.getOutgoingEdges()) { + mergeRecursive(outEdge.state) + } + val equivState = register.find { + state.equivalent(it) + } + if (equivState != null) { + merge(state, equivState) + } else (register.add(state)) + } + } + mergeRecursive(commonStart) + } + + private enum class State { Deadlock, Final, Known } + + /** + * Remove deadlocks -- states with no outgoing edges that are not finite + */ + private fun removeDeadlocks() { + val canGoToFinal = HashMap() + fun removeRecursive(state: RSMState) { + if (!canGoToFinal.contains(state)) { + if (state.isFinal) { + canGoToFinal[state] = State.Final + } else { + canGoToFinal[state] = State.Known + } + if (state.getOutgoingEdges().isEmpty()) { + canGoToFinal[state] = if (state.isFinal) State.Final else State.Deadlock + } + for (outEdge in state.getOutgoingEdges()) { + removeRecursive(outEdge.state) + when (canGoToFinal[outEdge.state]) { + State.Deadlock -> state.removeEdge(outEdge) + //cycle + State.Known -> {} + State.Final -> canGoToFinal[state] = State.Final + else -> throw IllegalArgumentException() + } + } + if (canGoToFinal[state] != State.Final) { + canGoToFinal[state] = State.Deadlock + } + } + } + removeRecursive(commonStart) + } + + + /** + * Find unreachable origin states and remove them from Register + */ + private fun unregisterUnreachable(): HashSet { + val incomingEdges = calculateIncomingEdges(commonStart) + val unreachable = origin.getAllStates().filter { + incomingEdges[it].isNullOrEmpty() + }.toHashSet() + register.removeAll(unreachable) + return unreachable + } + + /** + * Replace unreachable states from original Rsm as kotlin objects + * in equivalent place in result rsm + */ + private fun restoreUnreachableOrigins(unreachable: HashSet) { + /** + * Replace all incoming and outgoing transition from oldState to newState + * Remove all transition of oldState + */ + fun replace(oldState: RSMState, newState: RSMState) { + newState.getOutgoingEdges().forEach { edge -> + newState.removeEdge(edge) + } + calculateIncomingEdges(commonStart)[oldState]?.forEach { (state, symbol) -> + state.removeEdge(oldState, symbol) + state.addEdge(symbol, newState) + } + oldState.getOutgoingEdges().forEach { (state, symbol) -> + newState.addEdge(symbol, state) + } + } + + val used = mutableSetOf() + val queue = ArrayDeque() + var updatedStart: RSMState? = null + queue.add(commonStart) + + while (queue.isNotEmpty()) { + var state = queue.removeFirst() + if (!used.contains(state)) { + used.add(state) + val originState = cloneStatesToParents[state]!!.origin + if (unreachable.contains(originState)) { + unreachable.remove(originState) + replace(state, originState) + if (state == commonStart) { + updatedStart = originState + commonStart = originState + } + state = originState + } + for (edge in state.getOutgoingEdges()) { + queue.add(edge.state) + } + } + } + if (updatedStart == null) { + throw Exception("Start state should be updated!!") + } + } + + private fun clone(origin: RSMState, delta: RSMState): RSMState { + /** + * All outgoing transitions point to the corresponding intact states in , + * except for the transition with symbol a : xa ∈ Pr(w), + * which will points to the corresponding cloned state + */ + fun cloneOutgoingEdges(srcState: RSMState, destState: RSMState) { + val srcOutgoingEdges = srcState.getOutgoingEdges() + for (srcEdge in srcOutgoingEdges) { + destState.addEdge(srcEdge.symbol, srcEdge.state) + } + } + + fun isFinal(): Boolean { + if (isRemoving && delta.isFinal) { + return false + } + return origin.isFinal || delta.isFinal + } + + fun isStart(): Boolean = origin.isStart || delta.isStart + + val newState = RSMState(origin.nonterminal, isStart(), isFinal()) + cloneStatesToParents[newState] = CloneState(origin, delta) + cloneOutgoingEdges(origin, newState) + return newState + } + + private fun registerOriginStates() { + for (state in origin.getAllStates()) { + //modification: common states are states of original rsm + register.add(state) + cloneStatesToParents[state] = CloneState(state, botDelta) + } + } + + private fun addDeltaStates() { + /** + * If source rsm contains edge with deltaSymbol -- returns clone state + * in form (, ) + * Else (, ) + */ + fun cloneStep( + qLast: RSMState, + deltaSymbol: Symbol, + newDelta: RSMState, + origins: HashMap> + ): RSMState { + val newOrigin = origins[deltaSymbol]?.first() ?: botOrigin + val newState = clone(newOrigin, newDelta) + val destEdge = qLast.getOutgoingEdges().find { it.symbol == deltaSymbol } + if (destEdge != null) { + qLast.removeEdge(destEdge) + } + qLast.addEdge(deltaSymbol, newState) + return newState + } + + var qLast = commonStart + do { + val (originState, deltaState) = cloneStatesToParents[qLast]!! + val termEdges = deltaState.outgoingTerminalEdges.entries + val nonTermEdges = deltaState.outgoingNonterminalEdges.entries + for (t in termEdges) { + qLast = cloneStep(qLast, t.key, t.value.first(), originState.outgoingTerminalEdges) + } + for (nt in nonTermEdges) { + qLast = cloneStep(qLast, nt.key, nt.value.first(), originState.outgoingNonterminalEdges) + } + } while (termEdges.isNotEmpty() || nonTermEdges.isNotEmpty()) + } + + /** + * For each state get set of state which contains output edge to it + * and Symbol on this edge + */ + private fun calculateIncomingEdges(state: RSMState): HashMap> { + val used = hashSetOf() + val queue = ArrayDeque() + queue.add(state) + val incomingEdges = hashMapOf>() + while (queue.isNotEmpty()) { + val nextState = queue.removeFirst() + if (!used.contains(nextState)) { + used.add(nextState) + for (edge in nextState.getOutgoingEdges()) { + incomingEdges.getOrPut(edge.state) { hashSetOf() }.add(Edge(nextState, edge.symbol)) + queue.add(edge.state) + } + } + } + return incomingEdges + } +} + +data class Edge(val state: RSMState, val symbol: Symbol) + +fun RSMState.equivalent(other: RSMState): Boolean { + if (nonterminal != other.nonterminal) { + return false + } + if (isFinal != other.isFinal || isStart != other.isStart) { + return false + } + if (outgoingTerminalEdges != other.outgoingTerminalEdges) { + return false + } + return outgoingNonterminalEdges == other.outgoingNonterminalEdges +} + +fun RSMState.removeEdge(state: RSMState, symbol: Symbol) { + when (symbol) { + is Terminal<*> -> { + outgoingTerminalEdges[symbol]!!.remove(state) + if (outgoingTerminalEdges[symbol]!!.isEmpty()) { + outgoingTerminalEdges.remove(symbol) + } + } + + is Nonterminal -> { + outgoingNonterminalEdges[symbol]!!.remove(state) + if (outgoingNonterminalEdges[symbol]!!.isEmpty()) { + outgoingNonterminalEdges.remove(symbol) + } + } + else -> throw IllegalArgumentException("removing not implemented for Symbol implementation $symbol") + } +} + +fun RSMState.removeEdge(edge: Edge) = this.removeEdge(edge.state, edge.symbol) + +/** + * Get all states of RSM reachable from startState + */ +fun RSMState.getAllStates(): HashSet { + val states = hashSetOf() + val queue = ArrayDeque() + queue.add(this) + while (queue.isNotEmpty()) { + val state = queue.removeFirst() + if (!states.contains(state)) { + states.add(state) + for (edge in state.getOutgoingEdges()) { + queue.add(edge.state) + } + } + } + return states +} + +fun RSMState.getOutgoingEdges(): HashSet { + val states = hashSetOf() + outgoingNonterminalEdges.map { entry -> states.addAll(entry.value.map { Edge(it, entry.key) }) } + outgoingTerminalEdges.map { entry -> states.addAll(entry.value.map { Edge(it, entry.key) }) } + return states +} diff --git a/src/main/kotlin/org/srcgll/rsm/RSMState.kt b/src/main/kotlin/org/srcgll/rsm/RSMState.kt index ddcd569bf..12613bf49 100644 --- a/src/main/kotlin/org/srcgll/rsm/RSMState.kt +++ b/src/main/kotlin/org/srcgll/rsm/RSMState.kt @@ -1,6 +1,7 @@ package org.srcgll.rsm import org.srcgll.rsm.symbol.Nonterminal +import org.srcgll.rsm.symbol.Symbol import org.srcgll.rsm.symbol.Terminal class RSMState @@ -41,4 +42,20 @@ class RSMState outgoingNonterminalEdges[edge.nonterminal] = hashSetOf(edge.head) } } + + fun addEdge(label: Symbol, head: RSMState){ + when (label){ + is Terminal<*> -> addTerminalEdge(RSMTerminalEdge(label, head)) + is Nonterminal -> addNonterminalEdge(RSMNonterminalEdge(label, head)) + else -> throw IllegalArgumentException("removing not implemented for Symbol implementation $label") + } + } + + fun add(delta: RSMState) { + DynamicRsm(this).constructIncremental(delta, false) + } + + fun remove(delta: RSMState) { + DynamicRsm(this).constructIncremental(delta, true) + } } diff --git a/src/main/kotlin/org/srcgll/rsm/RSMWrite.kt b/src/main/kotlin/org/srcgll/rsm/RSMWrite.kt index ba77521a2..b503e2221 100644 --- a/src/main/kotlin/org/srcgll/rsm/RSMWrite.kt +++ b/src/main/kotlin/org/srcgll/rsm/RSMWrite.kt @@ -7,8 +7,8 @@ fun writeRSMToTXT(startState: RSMState, pathToTXT: String) { var lastId = 0 val stateToId: HashMap = HashMap() - fun getId(state: RSMState) { - stateToId.getOrPut(state) { lastId++ } + fun getId(state: RSMState): Int { + return stateToId.getOrPut(state) { lastId++ } } val states: ArrayList = ArrayList() @@ -100,8 +100,8 @@ fun writeRSMToDOT(startState: RSMState, pathToTXT: String) { var lastId = 0 val stateToId: HashMap = HashMap() - fun getId(state: RSMState) { - stateToId.getOrPut(state) { lastId++ } + fun getId(state: RSMState): Int { + return stateToId.getOrPut(state) { lastId++ } } val states: HashSet = HashSet() diff --git a/src/main/kotlin/org/srcgll/rsm/symbol/Symbol.kt b/src/main/kotlin/org/srcgll/rsm/symbol/Symbol.kt index a34756f89..1815497c2 100644 --- a/src/main/kotlin/org/srcgll/rsm/symbol/Symbol.kt +++ b/src/main/kotlin/org/srcgll/rsm/symbol/Symbol.kt @@ -1,3 +1,3 @@ package org.srcgll.rsm.symbol -interface Symbol \ No newline at end of file +sealed interface Symbol \ No newline at end of file diff --git a/src/test/kotlin/rsm/RsmTest.kt b/src/test/kotlin/rsm/RsmTest.kt index 178719c77..ff619d4c7 100644 --- a/src/test/kotlin/rsm/RsmTest.kt +++ b/src/test/kotlin/rsm/RsmTest.kt @@ -1,61 +1,109 @@ package rsm import org.junit.jupiter.api.Test -import org.srcgll.rsm.RSMNonterminalEdge -import org.srcgll.rsm.RSMState -import org.srcgll.rsm.RSMTerminalEdge +import org.srcgll.rsm.* import org.srcgll.rsm.symbol.Nonterminal import org.srcgll.rsm.symbol.Terminal +import kotlin.test.assertEquals import kotlin.test.assertFalse import kotlin.test.assertTrue interface RsmTest { + fun isDebug() = false + fun writeDotInDebug(startState: RSMState, rsmName: String) { + if (isDebug()) { + writeRSMToDOT(startState, "inc/$rsmName.dot") + writeRSMToTXT(startState, "inc/$rsmName.txt") + } + } + /** * Compare two RSM, two state are equal if they have same name - * */ fun equalsByNtName(expected: RSMState, actual: RSMState): Boolean { + return equalsByNtName(expected, actual, hashMapOf()) + } + + private fun equalsByNtName(expected: RSMState, actual: RSMState, equals: HashMap): Boolean { + if (equals[expected] != null) { + return equals[expected] === actual + } if (actual.nonterminal.name == null) { throw IllegalArgumentException("For comparing by name non terminal must have unique not null name") } - if (expected.nonterminal.name != actual.nonterminal.name - || expected.isStart != actual.isStart || expected.isFinal != actual.isFinal) { + if (expected.nonterminal.name != actual.nonterminal.name || expected.isStart != actual.isStart || expected.isFinal != actual.isFinal) { return false } - if (actual.outgoingTerminalEdges.size != expected.outgoingTerminalEdges.size - || actual.outgoingNonterminalEdges.size != expected.outgoingNonterminalEdges.size) { + equals[expected] = actual + if (actual.outgoingTerminalEdges.size != expected.outgoingTerminalEdges.size || actual.outgoingNonterminalEdges.size != expected.outgoingNonterminalEdges.size) { return false } for (tEdge in expected.outgoingTerminalEdges) { val states = actual.outgoingTerminalEdges[tEdge.key] ?: return false - if (!equalsAsSetByName(tEdge.value, states)) { + if (!equalsAsSetByName(tEdge.value, states, equals)) { return false } } for (ntEdge in expected.outgoingNonterminalEdges) { val states = actual.outgoingNonterminalEdges.entries.firstOrNull { it.key.name == ntEdge.key.name } ?: return false - if (!equalsAsSetByName(ntEdge.value, states.value)) { + if (!equalsAsSetByName(ntEdge.value, states.value, equals)) { return false } } + equals[expected] = actual return true } - private fun equalsAsSetByName(expected: HashSet, actual: HashSet): Boolean { + private fun equalsAsSetByName( + expected: HashSet, actual: HashSet, equals: HashMap + ): Boolean { if (expected.size != actual.size) { return false } for (state in expected) { val curState = actual.firstOrNull { it.nonterminal.name == state.nonterminal.name } - if (curState == null || !equalsByNtName(state, curState)) { + if (curState == null || !equalsByNtName(state, curState, equals)) { return false } } return true } - fun getAStar(stateName: String): RSMState { + fun testIncremental( + origin: RSMState, + delta: RSMState, + expected: RSMState, + expectedCommonStates: Int, + isRemoving: Boolean = false + ) { + writeDotInDebug(delta, "delta") + writeDotInDebug(expected, "expected") + writeDotInDebug(origin, "origin") + val originStates = origin.getAllStates() + if (isRemoving) { + origin.remove(delta) + } else { + origin.add(delta) + } + writeDotInDebug(origin, "actual") + assertTrue { equalsByNtName(expected, origin) } + assertEquals(expectedCommonStates, originStates.intersect(origin.getAllStates()).size) + } + + + @Test + fun testEquals() { + assertTrue { equalsByNtName(getAStarRSM("S"), getAStarRSM("S")) } + assertFalse { equalsByNtName(getAStarRSM("S"), getAStarRSM("K")) } + } + + @Test + fun debugTest(){ + assertFalse(isDebug(), "\"Debug\" flag must be set to false before committing.") + } + + fun getAStarRSM(stateName: String): RSMState { val s = Nonterminal(stateName) val a = Terminal("a") val st0 = RSMState(s, isStart = true) @@ -70,9 +118,4 @@ interface RsmTest { return s.startState } - @Test - fun testEquals() { - assertTrue { equalsByNtName(getAStar("S"), getAStar("S")) } - assertFalse { equalsByNtName(getAStar("S"), getAStar("K")) } - } } \ No newline at end of file diff --git a/src/test/kotlin/rsm/api/LinearDynamicDyckTest.kt b/src/test/kotlin/rsm/api/LinearDynamicDyckTest.kt new file mode 100644 index 000000000..96c149100 --- /dev/null +++ b/src/test/kotlin/rsm/api/LinearDynamicDyckTest.kt @@ -0,0 +1,123 @@ +package rsm.api + +import org.junit.jupiter.api.Test +import org.srcgll.grammar.combinator.Grammar +import org.srcgll.grammar.combinator.regexp.* +import org.srcgll.rsm.RSMState +import org.srcgll.rsm.symbol.Nonterminal +import rsm.RsmTest + +/** + * Compare incremental union of Grammar Rsm and linear Delta + * Nonterminals in Delta must be the same as in Origin Rsm! + */ +class LinearDynamicStarDyckTest : RsmTest { + + @Test + fun `test Dyck addition`() { + val origin = DyckLanguage().getRsm() + val delta = getDyckDelta(origin.nonterminal, "[", "]") + testIncremental(origin, delta, Dyck2().getRsm(), 4) + } + + @Test + fun `test removing brace from Dyck language`() { + val origin = Dyck2().getRsm() + val delta = getDyckDelta(origin.nonterminal, "[", "]") + testIncremental(origin, delta, DyckLanguage().getRsm(), 4, true) + } + + @Test + fun `test ExtDyck2 removing`() { + val origin = ExtDyck2().getRsm() + val delta = getExtDyckDelta(origin.nonterminal, "[", "]") + testIncremental(origin, delta, ExtDyck1().getRsm(), 5, true) + } + + @Test + fun `test ExtDyck2 addition`() { + val origin = ExtDyck2().getRsm() + val delta = getExtDyckDelta(origin.nonterminal, "{", "}") + testIncremental(origin, delta, ExtDyck3().getRsm(), 7) + } + + + /** + * Rsm for <'openBrace' nonTerm 'closeBrace'> + */ + private fun getDyckDelta(nonTerm: Nonterminal, openBrace: String, closeBrace: String): RSMState { + val nt = StandAloneNt(nonTerm) + return nt.buildRsm(Term(openBrace) * nt * Term(closeBrace)) + } + + /** + * Rsm for <'openBrace' nonTerm 'closeBrace' nonTerm> + */ + private fun getExtDyckDelta(nonTerm: Nonterminal, openBrace: String, closeBrace: String): RSMState { + val nt = StandAloneNt(nonTerm) + return nt.buildRsm(Term(openBrace) * nt * Term(closeBrace) * nt) + } + + /** + * Grammar for language S = '(' S ')' | '[[' S ']]' + */ + private class Dyck2 : Grammar() { + var S by NT() + + init { + setStart(S) + S = Term("[") * S * Term("]") or Term("(") * S * Term(")") + } + } + + /** + * Grammar for language S = eps | '(' S ')' S + */ + private class ExtDyck1 : Grammar() { + var S by NT() + + init { + setStart(S) + S = Epsilon or Term("(") * S * Term(")") * S + } + } + + /** + * Grammar for language S = eps | '(' S ')' S | '[[' S ']]' S + */ + private class ExtDyck2 : Grammar() { + var S by NT() + + init { + setStart(S) + S = Epsilon or Term("[") * S * Term("]") * S or Term("(") * S * Term(")") * S + } + } + + /** + * Grammar for language S = ( S ) + */ + class DyckLanguage : Grammar() { + var S by NT() + + init { + setStart(S) + S = Term("(") * S * Term(")") + } + } + + /** + * Grammar for language S = eps | '(' S ')' S | '[[' S ']]' S + */ + private class ExtDyck3 : Grammar() { + var S by NT() + + init { + setStart(S) + S = Epsilon or Term("[") * S * Term("]") * S or ( + Term("(") * S * Term(")") * S) or ( + Term("{") * S * Term("}") * S + ) + } + } +} diff --git a/src/test/kotlin/rsm/api/LinearDynamicSimpleTest.kt b/src/test/kotlin/rsm/api/LinearDynamicSimpleTest.kt new file mode 100644 index 000000000..fc8c1fffe --- /dev/null +++ b/src/test/kotlin/rsm/api/LinearDynamicSimpleTest.kt @@ -0,0 +1,113 @@ +package rsm.api + +import org.junit.jupiter.api.Test +import org.srcgll.grammar.combinator.Grammar +import org.srcgll.grammar.combinator.regexp.* +import org.srcgll.rsm.RSMState +import org.srcgll.rsm.symbol.Nonterminal +import rsm.RsmTest + +/** + * Compare incremental union of Grammar Rsm and linear Delta + * Nonterminals in Delta must be the same as in Origin Rsm! + */ +class LinearDynamicSimpleTest : RsmTest { + + @Test + fun `test union {(ba)+} with {bra}`() { + /** + * Grammar for language S = (ba)+ + */ + class BaPlus : Grammar() { + var S by NT() + + init { + setStart(S) + S = some(makeConcat("b", "a")) + } + } + + val origin = BaPlus().getRsm() + val s = origin.nonterminal + testIncremental(origin, getBra(s), BaPlusOrBra().getRsm(), 3) + } + + @Test + fun `test union {(ba)+, bra} with {bar}`() { + val origin = BaPlusOrBra().getRsm() + val s = origin.nonterminal + testIncremental(origin, getBar(s), BaPlusOrBarOrBra().getRsm(), 6) + } + + @Test + fun `test removing {baba} from {(ba)+, bar, bra}`() { + /** + * Grammar for language {(ba)+, bar, bra} \ {baba} + */ + class Expected : Grammar() { + var S by NT() + + init { + setStart(S) + S = makeConcat("b", "a") or ( + makeConcat("b", "a", "r") or ( + makeConcat("b", "r", "a") or ( + makeConcat("b", "a", "b", "a") * some(makeConcat("b", "a")) + ))) + } + } + + fun getBaba(nonTerm: Nonterminal): RSMState { + val s = StandAloneNt(nonTerm) + return s.buildRsm(makeConcat("b", "a", "b", "a")) + } + + val origin = BaPlusOrBarOrBra().getRsm() + val s = origin.nonterminal + testIncremental(origin, getBaba(s), Expected().getRsm(), 7, true) + + } + + /** + * Single-string automaton accepting string "bra" + */ + private fun getBra(nonTerm: Nonterminal): RSMState { + val s = StandAloneNt(nonTerm) + return s.buildRsm(makeConcat("b", "r", "a")) + } + + /** + * Single-string automaton accepting string "bra" + */ + private fun getBar(nonTerm: Nonterminal): RSMState { + val s = StandAloneNt(nonTerm) + return s.buildRsm(makeConcat("b", "a", "r")) + } + + /** + * Grammar for language {(ba)+, bar, bra} + */ + private class BaPlusOrBarOrBra : Grammar() { + var S by NT() + + init { + setStart(S) + S = some(makeConcat("b", "a")) or ( + makeConcat("b", "a", "r") or ( + makeConcat("b", "r", "a"))) + } + } + + /** + * Minimal automaton accepting the language {(ba)+, bra} + */ + private class BaPlusOrBra : Grammar() { + var S by NT() + + init { + setStart(S) + S = some(makeConcat("b", "a")) or + makeConcat("b", "r", "a") + } + } +} \ No newline at end of file diff --git a/src/test/kotlin/rsm/api/LinearDynamicStarDyckTest.kt b/src/test/kotlin/rsm/api/LinearDynamicStarDyckTest.kt new file mode 100644 index 000000000..8c9a2ed68 --- /dev/null +++ b/src/test/kotlin/rsm/api/LinearDynamicStarDyckTest.kt @@ -0,0 +1,60 @@ +package rsm.api + +import org.junit.jupiter.api.Test +import org.srcgll.grammar.combinator.Grammar +import org.srcgll.grammar.combinator.regexp.* +import org.srcgll.rsm.RSMState +import org.srcgll.rsm.symbol.Nonterminal +import rsm.RsmTest +import kotlin.test.Ignore + +/** + * Compare incremental union of Grammar Rsm and linear Delta + * Nonterminals in Delta must be the same as in Origin Rsm! + */ +class LinearDynamicDyckTest : RsmTest { + + @Test + @Ignore("not implemented yet: not linear input") + fun `test DyckStar addition`() { + val origin = DyckStar1().getRsm() + val delta = getStarDyckDelta(origin.nonterminal, "[", "]") + testIncremental(origin, delta, DyckStar2().getRsm(), 3) + } + + /** + * Rsm for <'openBrace' nonTerm 'closeBrace' nonTerm> + */ + private fun getStarDyckDelta(nonTerm: Nonterminal, openBrace: String, closeBrace: String): RSMState { + val nt = StandAloneNt(nonTerm) + return nt.buildRsm(Many(Term(openBrace) * nt * Term(closeBrace))) + } + + /** + * Grammar for language S = ( '(' S ')' )* + */ + private class DyckStar1 : Grammar() { + var S by NT() + + init { + setStart(S) + S = Many(Term("(") * S * Term(")")) + } + } + + /** + * Grammar for language S = ( '(' S ')' | '[[' S ']]' )* + */ + private class DyckStar2 : Grammar() { + var S by NT() + + init { + setStart(S) + S = Many( + Term("(") * S * Term(")") or ( + Term("[") * S * Term("]")) + ) + } + } + +} \ No newline at end of file diff --git a/src/test/kotlin/rsm/api/TerminalsEqualsTest.kt b/src/test/kotlin/rsm/api/TerminalsEqualsTest.kt index 502f870f6..1d47eea49 100644 --- a/src/test/kotlin/rsm/api/TerminalsEqualsTest.kt +++ b/src/test/kotlin/rsm/api/TerminalsEqualsTest.kt @@ -6,8 +6,9 @@ import org.srcgll.grammar.combinator.regexp.NT import org.srcgll.grammar.combinator.regexp.Term import org.srcgll.grammar.combinator.regexp.or import org.srcgll.grammar.combinator.regexp.times +import org.srcgll.rsm.writeRSMToDOT import rsm.RsmTest -import kotlin.test.assertTrue +import kotlin.test.assertFalse class TerminalsEqualsTest : RsmTest { class AStarTerms : Grammar() { @@ -21,16 +22,19 @@ class TerminalsEqualsTest : RsmTest { class AStar : Grammar() { var S by NT() - val A = Term("a") + var A by NT() init { setStart(S) S = A or A * S or S * S + A = Term("a") + } } @Test fun testRsm() { - assertTrue { equalsByNtName(AStar().getRsm(), AStarTerms().getRsm()) } + writeRSMToDOT(AStar().getRsm(), "actual.dot") + assertFalse { equalsByNtName(AStar().getRsm(), AStarTerms().getRsm()) } } } \ No newline at end of file diff --git a/src/test/kotlin/rsm/builder/AStarTest.kt b/src/test/kotlin/rsm/builder/AStarTest.kt index a972f82f5..70684595f 100644 --- a/src/test/kotlin/rsm/builder/AStarTest.kt +++ b/src/test/kotlin/rsm/builder/AStarTest.kt @@ -25,6 +25,6 @@ class AStarTest : RsmTest { fun testRsm() { val aStar = AStar() assertNotNull(aStar.S.getNonterminal()) - assertTrue { equalsByNtName(getAStar("S"), aStar.getRsm()) } + assertTrue { equalsByNtName(getAStarRSM("S"), aStar.getRsm()) } } } \ No newline at end of file diff --git a/src/test/kotlin/rsm/builder/OptionalTest.kt b/src/test/kotlin/rsm/builder/OptionalTest.kt new file mode 100644 index 000000000..1e2fa2b35 --- /dev/null +++ b/src/test/kotlin/rsm/builder/OptionalTest.kt @@ -0,0 +1,49 @@ +package rsm.builder + +import org.junit.jupiter.api.Test +import org.srcgll.grammar.combinator.Grammar +import org.srcgll.grammar.combinator.regexp.NT +import org.srcgll.grammar.combinator.regexp.Term +import org.srcgll.grammar.combinator.regexp.opt +import org.srcgll.grammar.combinator.regexp.times +import org.srcgll.rsm.RSMNonterminalEdge +import org.srcgll.rsm.RSMState +import org.srcgll.rsm.RSMTerminalEdge +import org.srcgll.rsm.symbol.Nonterminal +import org.srcgll.rsm.symbol.Terminal +import org.srcgll.rsm.writeRSMToTXT +import rsm.RsmTest +import kotlin.test.assertNotNull +import kotlin.test.assertTrue + +class OptionalTest : RsmTest { + class AStar : Grammar() { + var S by NT() + + init { + setStart(S) + S = opt(Term("a")) * S + } + } + override fun getAStarRSM(stateName: String): RSMState { + val s = Nonterminal(stateName) + val a = Terminal("a") + val st0 = RSMState(s, isStart = true) + s.startState = st0 + val st1 = RSMState(s) + val st2 = RSMState(s, isFinal = true) + st0.addTerminalEdge(RSMTerminalEdge(a, st1)) + st1.addNonterminalEdge(RSMNonterminalEdge(s, st2)) + st0.addNonterminalEdge(RSMNonterminalEdge(s, st2)) + return s.startState + } + + @Test + fun testRsm() { + val aStar = AStar() + assertNotNull(aStar.S.getNonterminal()) + writeRSMToTXT(aStar.getRsm(), "actual.txt") + writeRSMToTXT(getAStarRSM("S"), "expected.txt") + assertTrue { equalsByNtName(getAStarRSM("S"), aStar.getRsm()) } + } +} \ No newline at end of file diff --git a/src/test/kotlin/rsm/builder/StandAloneBuilderTest.kt b/src/test/kotlin/rsm/builder/StandAloneBuilderTest.kt new file mode 100644 index 000000000..064c55996 --- /dev/null +++ b/src/test/kotlin/rsm/builder/StandAloneBuilderTest.kt @@ -0,0 +1,84 @@ +package rsm.builder + +import org.srcgll.grammar.combinator.regexp.StandAloneNt +import org.srcgll.grammar.combinator.regexp.Term +import org.srcgll.grammar.combinator.regexp.makeConcat +import org.srcgll.grammar.combinator.regexp.times +import org.srcgll.rsm.RSMState +import org.srcgll.rsm.symbol.Nonterminal +import org.srcgll.rsm.symbol.Terminal +import rsm.RsmTest +import kotlin.test.Test + +class StandAloneBuilderTest : RsmTest { + @Test + fun testDyckDelta() { + fun getExpected(nonTerm: Nonterminal): RSMState { + val deltaStart = RSMState(nonTerm, isStart = true) + val st1 = RSMState(nonTerm) + val st2 = RSMState(nonTerm) + val st3 = RSMState(nonTerm) + val st4 = RSMState(nonTerm, isFinal = true) + deltaStart.addEdge(Terminal("["), st1) + st1.addEdge(nonTerm, st2) + st2.addEdge(Terminal("]"), st3) + st3.addEdge(nonTerm, st4) + return deltaStart + } + + fun getActual(nonTerm: Nonterminal): RSMState { + val s = StandAloneNt(nonTerm) + s.setDescription(Term("[") * s * Term("]") * s) + return s.buildRsmBox() + } + + val nonTerm = Nonterminal("S") + equalsByNtName(getExpected(nonTerm), getActual(nonTerm)) + } + + @Test + fun testBabaDelta() { + fun getExpectedBaba(nonTerm: Nonterminal): RSMState { + val st0 = RSMState(nonTerm, isStart = true) + val st1 = RSMState(nonTerm) + val st2 = RSMState(nonTerm) + val st3 = RSMState(nonTerm) + val st4 = RSMState(nonTerm, isFinal = true) + st0.addEdge(Terminal("b"), st1) + st1.addEdge(Terminal("a"), st2) + st2.addEdge(Terminal("b"), st3) + st3.addEdge(Terminal("a"), st4) + return st0 + } + + fun getActualBaba(nonTerm: Nonterminal): RSMState { + val s = StandAloneNt(nonTerm) + return s.buildRsm(makeConcat("b", "a", "b", "a")) + } + + val nonTerm = Nonterminal("S") + equalsByNtName(getExpectedBaba(nonTerm), getActualBaba(nonTerm)) + } + + @Test + fun testBra(){ + fun getExpectedBra(nonTerm: Nonterminal): RSMState { + val st0 = RSMState(nonTerm, isStart = true) + val st1 = RSMState(nonTerm) + val st2 = RSMState(nonTerm) + val st3 = RSMState(nonTerm, isFinal = true) + st0.addEdge(Terminal("b"), st1) + st1.addEdge(Terminal("r"), st2) + st2.addEdge(Terminal("a"), st3) + return st0 + } + + fun getActualBra(nonTerm: Nonterminal): RSMState { + val s = StandAloneNt(nonTerm) + return s.buildRsm(makeConcat("b", "r", "a")) + } + val nonTerm = Nonterminal("S") + equalsByNtName(getExpectedBra(nonTerm), getActualBra(nonTerm)) + } +} +