Skip to content

Commit 6d7b988

Browse files
committed
levFSA optimizations
1 parent 3f6d5ce commit 6d7b988

File tree

8 files changed

+234
-124
lines changed

8 files changed

+234
-124
lines changed

src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/AFSA.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package ai.hypergraph.kaliningraph.automata
22

33
import ai.hypergraph.kaliningraph.KBitSet
4+
import ai.hypergraph.kaliningraph.parsing.Bindex
45
import ai.hypergraph.kaliningraph.parsing.Σᐩ
6+
import ai.hypergraph.kaliningraph.types.Π3A
57
import kotlin.time.TimeSource
68

79
// Acyclic finite state automaton

src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/FSA.kt

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,65 @@ open class FSA constructor(open val Q: TSA, open val init: Set<Σᐩ>, open val
3737
val states: Set<Σᐩ> by lazy { Q.states() }
3838
open val stateLst: List<Σᐩ> by lazy { TODO() } //states.toList() }
3939

40-
fun allIndexedTxs1(unitProds: Set<Π2A<Σᐩ>>): List<Π3<Int, Σᐩ, Int>> {
40+
fun allIndexedTxs1(unitProds: Map<Σᐩ, List<Σᐩ>>): List<Π3<Int, Σᐩ, Int>> {
4141
val triples = mutableListOf<Π3<Int, Σᐩ, Int>>()
42-
for ((A, σ) in unitProds) for (arc in nominalForm.flattenedTriples)
42+
for ((A, σs) in unitProds) forin σs) for (arc in nominalForm.flattenedTriples)
4343
if (arc.π2(σ)) triples.add(Triple(stateMap[arc.π1]!!, σ, stateMap[arc.π3]!!))
4444
return triples
4545
}
4646

47-
fun allIndexedTxs0(unitProds: Set<Π2A<Σᐩ>>, bindex: Bindex<Σᐩ>): List<Π3A<Int>> {
47+
// fun allIndexedTxs0(unitProds: Map<String, List<String>>, bindex: Bindex<String>): List<Π3A<Int>> {
48+
// // Local refs to avoid virtual lookups inside loops
49+
// val wild = nominalForm.wildArcs
50+
// val eq = nominalForm.eqArcs
51+
// val ne = nominalForm.neArcs
52+
// val neAll = nominalForm.allNeArcs
53+
//
54+
// val out = ArrayList<Π3A<Int>>(unitProds.size * (wild.size + neAll.size + 8))
55+
//
56+
// unitProds.forEach { (A, σs) ->
57+
// if (σs.isEmpty()) return@forEach
58+
// val Aint = bindex[A]
59+
//
60+
// // 1) Wildcards match iff sigmas non-empty
61+
// for (e in wild) out.add(Triple(e.from, Aint, e.to))
62+
//
63+
// when (σs.size) {
64+
// 1 -> {
65+
// val only = σs[0]
66+
// // 2a) Eq arcs: only for that literal
67+
// eq[only]?.forEach { e -> out.add(Triple(e.from, Aint, e.to)) }
68+
// // 2b) Ne arcs: all except those excluding 'only'
69+
// ne.forEach { (x, edges) ->
70+
// if (x != only) edges.forEach { e -> out.add(Triple(e.from, Aint, e.to)) }
71+
// }
72+
// }
73+
// else -> {
74+
// // |σs| >= 2
75+
// // 2a) Eq arcs: for each member (dedup keys cheaply if big)
76+
// val iter = if (σs.size > 8) σs.toHashSet() else σs
77+
// for (s in iter) eq[s]?.forEach { e -> out.add(Triple(e.from, Aint, e.to)) }
78+
// // 2b) Ne arcs: ALWAYS match when there are at least two distinct σ
79+
// for (e in neAll) out.add(Triple(e.from, Aint, e.to))
80+
// }
81+
// }
82+
// }
83+
// return out
84+
// }
85+
open fun allIndexedTxs0(unitProds: Map<Σᐩ, List<Σᐩ>>, bindex: Bindex<Σᐩ>): List<Π3A<Int>> {
4886
val triples = mutableListOf<Π3A<Int>>()
49-
for ((A, σ) in unitProds) for (arc in nominalForm.flattenedTriples)
50-
if (arc.π2(σ)) triples.add(Triple(stateMap[arc.π1]!!, bindex[A], stateMap[arc.π3]!!))
87+
for ((A, σs) in unitProds.entries) {
88+
val Aint = bindex[A]
89+
forin σs) for (arc in nominalForm.flattenedTriples)
90+
if (arc.π2(σ))
91+
triples.add(
92+
Triple(
93+
stateMap[arc.π1]!!,
94+
Aint,
95+
stateMap[arc.π3]!!
96+
)
97+
)
98+
}
5199
return triples
52100
}
53101

@@ -141,7 +189,7 @@ open class FSA constructor(open val Q: TSA, open val init: Set<Σᐩ>, open val
141189
val bindex = cfg.bindex
142190
val width = cfg.nonterminals.size
143191
val vindex = cfg.vindex
144-
val ups = cfg.unitProductions
192+
val ups = cfg.grpUPs
145193
val aps: List<List<List<Int>?>> = levFSA.allPairs
146194
val dp = Array(levFSA.numStates) { Array(levFSA.numStates) { BooleanArray(width) { false } } }
147195

@@ -193,7 +241,7 @@ open class FSA constructor(open val Q: TSA, open val init: Set<Σᐩ>, open val
193241
val bimap = cfg.bimap
194242
val width = cfg.nonterminals.size
195243
val vindex = cfg.vindex
196-
val ups = cfg.unitProductions
244+
val ups = cfg.grpUPs
197245

198246
val nStates = levFSA.numStates
199247
val startIdx = bindex[START_SYMBOL]

src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/GRE.kt

Lines changed: 11 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -102,31 +102,6 @@ sealed class GRE(open vararg val args: GRE) {
102102
is CAT -> (l.dv(σ) * r).let { dl -> if (l.nullable) dl + r.dv(σ) else dl }
103103
}
104104

105-
// fun dv(σ: Int): GRE? = when (this) {
106-
// is EPS -> null // ∂_σ(ε) = ∅
107-
// is SET -> if (s[σ]) EPS() else null // ∂_σ({a}) = ε if σ = a, else ∅
108-
// is CUP -> {
109-
// val derivatives = args.mapNotNull { it.dv(σ) }
110-
// if (derivatives.isEmpty()) null
111-
// else derivatives.reduce { acc, next -> CUP(acc, next) }
112-
// }
113-
// is CAT -> {
114-
// val dl = l.dv(σ) // Left derivative
115-
// val leftPart = dl?.let { CAT(it, r) }
116-
// if (l.nullable) {
117-
// val dr = r.dv(σ) // Right derivative
118-
// when {
119-
// leftPart != null && dr != null -> CUP(leftPart, dr)
120-
// leftPart != null -> leftPart
121-
// dr != null -> dr
122-
// else -> null
123-
// }
124-
// } else {
125-
// leftPart
126-
// }
127-
// }
128-
// }
129-
130105
val nullable by lazy { isNullable() }
131106

132107
// Check whether 'g' accepts the empty string ε.
@@ -140,86 +115,6 @@ sealed class GRE(open vararg val args: GRE) {
140115
operator fun plus(g: GRE): GRE = CUP(this, g)
141116
operator fun times(g: GRE): GRE = CAT(this, g)
142117

143-
fun toDOTGraph(rankDir: String = "TB", font: String = "Helvetica", dedupLeaves: Boolean = true): String {
144-
fun Int.toUnicodeSubscript(): String = when (this) {
145-
0 -> "\u2080"
146-
1 -> "\u2081"
147-
2 -> "\u2082"
148-
3 -> "\u2083"
149-
4 -> "\u2084"
150-
5 -> "\u2085"
151-
6 -> "\u2086"
152-
7 -> "\u2087"
153-
8 -> "\u2088"
154-
9 -> "\u2089"
155-
else -> throw IllegalArgumentException("Input must be between 0 and 9")
156-
}
157-
158-
fun KBitSet.labelize(): String =
159-
(0 until n).mapNotNull { if (this[it]) "Σ${it.toUnicodeSubscript()}" else null }.joinToString(",", "{", "}")
160-
161-
data class Key(val kind: String, val payload: String)
162-
163-
val nodeId = mutableMapOf<Int, String>()
164-
val nodeDecl = StringBuilder()
165-
val edgeDecl = StringBuilder()
166-
var nextId = 0
167-
168-
fun newNodeId() = "n${nextId++}"
169-
170-
var i = 0
171-
fun declareNodeA(label: String, shape: String = "circle", style: String = ", style=\"rounded\"", extra: String = ""): String =
172-
nodeId.getOrPut(i++) {
173-
val id = newNodeId()
174-
nodeDecl.append(" $id [label=\"$label\", shape=$shape $style $extra];\n")
175-
id
176-
}
177-
178-
fun declareNodeB(key: Key, label: String, shape: String = "circle"): String =
179-
nodeId.getOrPut(i++) {
180-
val id = newNodeId()
181-
nodeDecl.append(" $id [label=\"$label\", shape=$shape, style=\"rounded\"];\n")
182-
id
183-
}
184-
185-
fun visit(g: GRE): String = when (g) {
186-
is EPS -> declareNodeB(Key("EPS", ""), "ε", "plaintext")
187-
188-
is SET -> declareNodeA(g.s.labelize(), "box", extra = ", width=0.5")
189-
190-
is CUP -> {
191-
if (!isLeafCup()) {
192-
val id = declareNodeB(Key("CUP${g.hash()}", ""), "")
193-
for (child in g.args) { edgeDecl.append(" $id -> ${visit(child)};\n") }
194-
id
195-
} else {
196-
val q = g.toSet() as SET
197-
val key = if (dedupLeaves) Key("SET", q.s.toString())
198-
else Key("SET${g.hashCode()}", "")
199-
declareNodeB(key, q.s.labelize(), "box")
200-
}
201-
}
202-
203-
is CAT -> {
204-
val id = declareNodeA("·", "invhouse", ",", "width=0.5")
205-
val lId = visit(g.l); edgeDecl.append(" $id -> $lId;\n")
206-
val rId = visit(g.r); edgeDecl.append(" $id -> $rId;\n")
207-
id
208-
}
209-
}
210-
211-
visit(this)
212-
213-
return buildString {
214-
appendLine("strict digraph GRE {")
215-
appendLine(" rankdir=$rankDir;")
216-
appendLine(" node [order=out];")
217-
append(nodeDecl)
218-
append(edgeDecl)
219-
appendLine("}")
220-
}
221-
}
222-
223118
fun flatunion(): GRE =
224119
if (this is CUP && args.all { it is CUP }) CUP(*args.flatMap { it.args.toList() }.toTypedArray())
225120
else this
@@ -286,7 +181,7 @@ fun repairWithGREAtDist(brokenStr: List<Σᐩ>, cfg: CFG, d: Int): Pair<GRE.CUP,
286181
val bindex = cfg.bindex
287182
val width = cfg.nonterminals.size
288183
val vindex = cfg.vindex
289-
val ups = cfg.unitProductions
184+
val ups = cfg.grpUPs
290185
val t2vs = cfg.tmToVidx
291186
val maxBranch = vindex.maxOf { it.size }
292187
val startIdx = bindex[START_SYMBOL]
@@ -353,7 +248,7 @@ fun repairWithGREAtDist(brokenStr: List<Σᐩ>, cfg: CFG, d: Int): Pair<GRE.CUP,
353248
.apply { s.set(tmm[σ]!!)/*; dq[p][q].set(Aidx)*/ }
354249

355250
var maxChildren = 0
356-
var location = -1 to -1
251+
// var location = -1 to -1
357252

358253
// 3) CYK + Floyd Warshall parsing
359254
for (dist in 1..<nStates) {
@@ -380,7 +275,7 @@ fun repairWithGREAtDist(brokenStr: List<Σᐩ>, cfg: CFG, d: Int): Pair<GRE.CUP,
380275
if (rhsPairs.isNotEmpty()) {
381276
if (list.size > maxChildren) {
382277
maxChildren = list.size
383-
location = p to q
278+
// location = p to q
384279
}
385280
dp[p][q][Aidx] = if (list.size == 1) list.first() else GRE.CUP(*list)
386281
}
@@ -403,7 +298,7 @@ fun repairWithGRE(brokenStr: List<Σᐩ>, cfg: CFG): GRE? {
403298
val bindex = cfg.bindex
404299
val width = cfg.nonterminals.size
405300
val vindex = cfg.vindex
406-
val ups = cfg.unitProductions
301+
val ups = cfg.grpUPs
407302
val t2vs = cfg.tmToVidx
408303
val maxBranch = vindex.maxOf { it.size }
409304
val startIdx = bindex[START_SYMBOL]
@@ -470,7 +365,7 @@ fun repairWithGRE(brokenStr: List<Σᐩ>, cfg: CFG): GRE? {
470365
.apply { s.set(tmm[σ]!!)/*; dq[p][q].set(Aidx)*/ }
471366

472367
var maxChildren = 0
473-
var location = -1 to -1
368+
// var location = -1 to -1
474369

475370
// 3) CYK + Floyd Warshall parsing
476371
for (dist in 1..<nStates) {
@@ -497,7 +392,7 @@ fun repairWithGRE(brokenStr: List<Σᐩ>, cfg: CFG): GRE? {
497392
if (rhsPairs.isNotEmpty()) {
498393
if (list.size > maxChildren) {
499394
maxChildren = list.size
500-
location = p to q
395+
// location = p to q
501396
}
502397
dp[p][q][Aidx] = if (list.size == 1) list.first() else GRE.CUP(*list)
503398
}
@@ -529,7 +424,7 @@ suspend fun initiateSuspendableRepair(brokenStr: List<Σᐩ>, cfg: CFG): GRE? {
529424
val bindex = cfg.bindex
530425
val width = cfg.nonterminals.size
531426
val vindex = cfg.vindex
532-
val ups = cfg.unitProductions
427+
val ups = cfg.grpUPs
533428
val t2vs = cfg.tmToVidx
534429
val maxBranch = vindex.maxOf { it.size }
535430
val startIdx = bindex[START_SYMBOL]
@@ -598,7 +493,7 @@ suspend fun initiateSuspendableRepair(brokenStr: List<Σᐩ>, cfg: CFG): GRE? {
598493
.apply { pause(); s.set(tmm[σ]!!)/*; dq[p][q].set(Aidx)*/ }
599494

600495
var maxChildren = 0
601-
var location = -1 to -1
496+
// var location = -1 to -1
602497

603498
// 3) CYK + Floyd Warshall parsing
604499
for (dist in 1 until nStates) {
@@ -626,7 +521,7 @@ suspend fun initiateSuspendableRepair(brokenStr: List<Σᐩ>, cfg: CFG): GRE? {
626521
if (rhsPairs.isNotEmpty()) {
627522
if (list.size > maxChildren) {
628523
maxChildren = list.size
629-
location = p to q
524+
// location = p to q
630525
}
631526
dp[p][q][Aidx] = if (list.size == 1) list.first() else GRE.CUP(*list)
632527
}
@@ -640,7 +535,8 @@ suspend fun initiateSuspendableRepair(brokenStr: List<Σᐩ>, cfg: CFG): GRE? {
640535
val allParses = levFSA.finalIdxs.mapNotNull { q -> dp[0][q][startIdx] }
641536

642537
println("Parsing took ${timer.elapsedNow()} with |σ|=${brokenStr.size}, " +
643-
"|Q|=$nStates, |G|=${cfg.size}, maxBranch=$maxBranch, |V|=$width, |Σ|=$tms, maxChildren=$maxChildren@$location")
538+
// "|Q|=$nStates, |G|=${cfg.size}, maxBranch=$maxBranch, |V|=$width, |Σ|=$tms, maxChildren=$maxChildren@$location")
539+
"|Q|=$nStates, |G|=${cfg.size}, maxBranch=$maxBranch, |V|=$width, |Σ|=$tms")
644540
// 5) Combine them under a single GRE
645541
return if (allParses.isEmpty()) null else GRE.CUP(*allParses.toTypedArray())
646542
}

src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/Nominal.kt

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,41 @@ class NOM(override val Q: TSA, override val init: Set<Σᐩ>, override val final
3232
mapF[it]?.filter { it.first(sym) }?.map { it.second } ?: emptyList()
3333
}.toSet()
3434
}.any { it in final }
35+
36+
// enum class PredKind { Wild, Eq, Ne }
37+
// data class IEdge(val from: Int, val to: Int)
38+
// data class ClassArc(val kind: PredKind, val sym: String?, val edge: IEdge)
39+
//
40+
// override val stateLst by lazy { states.groupBy { it.coords().let { (a, b) -> a + b } }.values.flatten() }
41+
//
42+
// val classifiedArcs: List<ClassArc> by lazy {
43+
// Q.map { (a, b, c) ->
44+
// val from = stateMap[a]!!
45+
// val to = stateMap[c]!!
46+
// val (kind, sym) = when {
47+
// b == "[.*]" -> PredKind.Wild to null
48+
// b.startsWith("[!=]") -> PredKind.Ne to b.drop(4)
49+
// else -> PredKind.Eq to b
50+
// }
51+
// ClassArc(kind, sym, IEdge(from, to))
52+
// }
53+
// }
54+
//
55+
// val wildArcs: List<IEdge> by lazy { classifiedArcs.asSequence().filter { it.kind == PredKind.Wild }.map { it.edge }.toList() }
56+
//
57+
// val eqArcs: Map<String, List<IEdge>> by lazy {
58+
// classifiedArcs.asSequence()
59+
// .filter { it.kind == PredKind.Eq }
60+
// .groupBy({ it.sym!! }, { it.edge })
61+
// }
62+
//
63+
// val neArcs: Map<String, List<IEdge>> by lazy {
64+
// classifiedArcs.asSequence()
65+
// .filter { it.kind == PredKind.Ne }
66+
// .groupBy({ it.sym!! }, { it.edge }) // arcs that match any σ != key
67+
// }
68+
//
69+
// val allNeArcs: List<IEdge> by lazy { neArcs.values.flatten() }
3570
}
3671

3772
fun FSA.nominalize() = NOM(Q, init, final)

src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/CFG.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ val CFG.symbols: Set<Σᐩ> by cache { nonterminals + flatMap { it.RHS } }
4343
val CFG.terminals: Set<Σᐩ> by cache { symbols - nonterminals }
4444
val CFG.terminalUnitProductions: Set<Production> by cache { filter { it.RHS.size == 1 && it.RHS[0] !in nonterminals } }
4545
val CFG.unitProductions: Set<Pair<Σᐩ, Σᐩ>> by cache { filter { it.RHS.size == 1 }.map { it.LHS to it.RHS[0] }.toSet() }
46+
val CFG.grpUPs: Map<Σᐩ, List<Σᐩ>> by cache { unitProductions.groupBy({ it.first }, { it.second }) }
4647
val CFG.nonterminalProductions: Set<Production> by cache { filter { it !in terminalUnitProductions } }
4748
val CFG.unitNonterminals: Set<Σᐩ> by cache { terminalUnitProductions.map { it.LHS }.toSet() }
4849
val CFG.bimap: BiMap by cache { BiMap(this) }

src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,33 @@ fun makeExactLevCFL(
7676
.also { println("Levenshtein-${str.size}x$radius automaton had ${Q.size} arcs!") }
7777
}
7878

79+
/** Uses nominal arc predicates. See [NOM] for denominalization. */
80+
fun makeLevFSA(
81+
str: List<Σᐩ>,
82+
maxRad: Int, // Maximum Levenshtein distance the automaton should accept
83+
digits: Int = (str.size * maxRad).toString().length,
84+
): FSA {
85+
val clock = TimeSource.Monotonic.markNow()
86+
var initSize = 0
87+
val fsa = (upArcs(str, maxRad, digits) +
88+
diagArcs(str, maxRad, digits) +
89+
str.mapIndexed { i, it -> rightArcs(i, maxRad, it, digits) }.flatten() +
90+
str.mapIndexed { i, it -> knightArcs(i, maxRad, it, digits, str) }.flatten())
91+
.also { initSize = it.size }
92+
.let { Q ->
93+
val initialStates = setOf("q_" + pd(0, digits).let { "$it/$it" })
94+
val finalStates =
95+
Q.states().filter { it.unpackCoordinates().let { (i, j) -> ((str.size - i + j).absoluteValue <= maxRad) } }
96+
97+
AFSA(Q, initialStates, finalStates)
98+
.also { it.height = maxRad; it.width = str.size; it.levString = str }
99+
// .nominalize()
100+
// .also { println("Reduced L-NFA(len=${str.size}, rad=$maxRad, states=${it.numStates}) " +
101+
// "from $initSize to ${Q.size} arcs in ${clock.elapsedNow()}") }
102+
}
103+
return fsa
104+
}
105+
79106
/** Uses nominal arc predicates. See [NOM] for denominalization. */
80107
fun makeLevFSA(
81108
str: List<Σᐩ>,

0 commit comments

Comments
 (0)