Skip to content

Commit 0e7ca51

Browse files
committed
extract common functionality
1 parent 80df8f6 commit 0e7ca51

File tree

3 files changed

+140
-11
lines changed

3 files changed

+140
-11
lines changed

build.gradle.kts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ kotlin {
8686
implementation(kotlin("reflect:2.1.0"))
8787

8888
implementation("com.ionspin.kotlin:bignum:0.3.10")
89+
90+
api("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.10.2")
8991
}
9092
}
9193

@@ -99,9 +101,6 @@ kotlin {
99101
implementation("guru.nidi:graphviz-kotlin:0.18.1")
100102
implementation("org.graalvm.js:js:24.2.1")
101103

102-
// Markovian deps
103-
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.10.2")
104-
105104
implementation("org.jetbrains.lets-plot:platf-awt-jvm:4.4.1")
106105
implementation("org.jetbrains.lets-plot:lets-plot-kotlin-jvm:4.10.0")
107106

src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/GRE.kt

Lines changed: 136 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@ package ai.hypergraph.kaliningraph.automata
22

33
import ai.hypergraph.kaliningraph.KBitSet
44
import ai.hypergraph.kaliningraph.parsing.*
5-
import ai.hypergraph.kaliningraph.repair.LED_BUFFER
6-
import ai.hypergraph.kaliningraph.repair.MAX_RADIUS
7-
import ai.hypergraph.kaliningraph.sampling.randomString
5+
import ai.hypergraph.kaliningraph.repair.*
86
import ai.hypergraph.kaliningraph.tensor.UTMatrix
97
import ai.hypergraph.kaliningraph.types.*
10-
import kotlin.collections.plus
11-
import kotlin.math.absoluteValue
12-
import kotlin.math.max
8+
import kotlinx.coroutines.delay
9+
import kotlin.math.*
10+
import kotlin.time.Duration.Companion.nanoseconds
1311
import kotlin.time.TimeSource
1412

1513
// Generalized regular expression: https://planetmath.org/generalizedregularexpression
@@ -374,7 +372,138 @@ fun repairWithGRE(brokenStr: List<Σᐩ>, cfg: CFG): GRE? {
374372
// 4) Gather final parse trees from dp[0][f][startIdx], for all final states f
375373
val allParses = levFSA.finalIdxs.mapNotNull { q -> dp[0][q][startIdx] }
376374

377-
val clock = TimeSource.Monotonic.markNow()
378375
// 5) Combine under a single GRE
379376
return if (allParses.isEmpty()) null else GRE.CUP(*allParses.toTypedArray())
377+
}
378+
379+
fun initiateSerialRepair(brokenStr: List<Σᐩ>, cfg: CFG): Sequence<Σᐩ> =
380+
repairWithGRE(brokenStr, cfg)?.words(cfg.tmLst) ?: emptySequence()
381+
382+
// Same as serial repair, but with strategic pauses to prevent stuttering on single-threaded runtimes
383+
suspend fun initiateSuspendableRepair(brokenStr: List<Σᐩ>, cfg: CFG): GRE? {
384+
var i = 0
385+
val upperBound = MAX_RADIUS * 3
386+
// val monoEditBounds = cfg.maxParsableFragmentB(brokenStr, pad = upperBound)
387+
val timer = TimeSource.Monotonic.markNow()
388+
val bindex = cfg.bindex
389+
val width = cfg.nonterminals.size
390+
val vindex = cfg.vindex
391+
val ups = cfg.unitProductions
392+
val t2vs = cfg.tmToVidx
393+
val maxBranch = vindex.maxOf { it.size }
394+
val startIdx = bindex[START_SYMBOL]
395+
396+
suspend fun pause(freq: Int = 300_000) { if (i++ % freq == 0) { delay(50.nanoseconds) }}
397+
398+
suspend fun nonemptyLevInt(levFSA: FSA): Int? {
399+
val ap: List<List<List<Int>?>> = levFSA.allPairs
400+
val dp = Array(levFSA.numStates) { Array(levFSA.numStates) { BooleanArray(width) { false } } }
401+
402+
levFSA.allIndexedTxs0(ups, bindex).forEach { (q0, nt, q1) -> dp[q0][q1][nt] = true }
403+
var minRad: Int = Int.MAX_VALUE
404+
405+
// For pairs (p,q) in topological order
406+
for (dist: Int in 1..<dp.size) {
407+
for (iP: Int in 0..<dp.size - dist) {
408+
val p = iP
409+
val q = iP + dist
410+
if (ap[p][q] == null) continue
411+
val appq = ap[p][q]!!
412+
for ((A: Int, indexArray: IntArray) in vindex.withIndex()) {
413+
pause()
414+
outerloop@for(j: Int in 0..<indexArray.size step 2) {
415+
val B = indexArray[j]
416+
val C = indexArray[j + 1]
417+
for (r in appq)
418+
if (dp[p][r][B] && dp[r][q][C]) {
419+
dp[p][q][A] = true
420+
break@outerloop
421+
}
422+
}
423+
424+
if (p == 0 && A == startIdx && q in levFSA.finalIdxs && dp[p][q][A]) {
425+
val (x, y) = levFSA.idsToCoords[q]!!
426+
/** See final state conditions for [makeExactLevCFL] */
427+
// The minimum radius such that this final state is included in the L-FSA
428+
minRad = minOf(minRad, (brokenStr.size - x + y).absoluteValue)
429+
}
430+
}
431+
}
432+
}
433+
434+
return if (minRad == Int.MAX_VALUE) null else minRad
435+
}
436+
437+
val led = (3..<upperBound)
438+
.firstNotNullOfOrNull { nonemptyLevInt(makeLevFSA(brokenStr, it)) } ?:
439+
upperBound.also { println("Hit upper bound") }
440+
val radius = led + LED_BUFFER
441+
442+
println("Identified LED=$led, radius=$radius in ${timer.elapsedNow()}")
443+
444+
val levFSA = makeLevFSA(brokenStr, radius)
445+
446+
val nStates = levFSA.numStates
447+
val tml = cfg.tmLst
448+
val tms = tml.size
449+
val tmm = cfg.tmMap
450+
451+
// 1) Create dp array of parse trees
452+
val dp: Array<Array<Array<GRE?>>> = Array(nStates) { Array(nStates) { Array(width) { null } } }
453+
454+
// 2) Initialize terminal productions A -> a
455+
val aitx = levFSA.allIndexedTxs1(ups)
456+
for ((p, σ, q) in aitx) for (Aidx in t2vs[tmm[σ]!!])
457+
dp[p][q][Aidx] = ((dp[p][q][Aidx] as? GRE.SET) ?: GRE.SET(tms))
458+
.apply { pause(); s.set(tmm[σ]!!)/*; dq[p][q].set(Aidx)*/ }
459+
460+
var maxChildren = 0
461+
var location = -1 to -1
462+
463+
// 3) CYK + Floyd Warshall parsing
464+
for (dist in 1 until nStates) {
465+
for (p in 0 until (nStates - dist)) {
466+
val q = p + dist
467+
if (levFSA.allPairs[p][q] == null) continue
468+
val appq = levFSA.allPairs[p][q]!!
469+
470+
for ((Aidx, indexArray) in vindex.withIndex()) {
471+
// println("${cfg.bindex[Aidx]}(${pm!!.ntLengthBounds[Aidx]}):${levFSA.stateLst[p]}-${levFSA.stateLst[q]}(${levFSA.SPLP(p, q)})")
472+
val rhsPairs = dp[p][q][Aidx]?.let { mutableListOf(it) } ?: mutableListOf()
473+
outerLoop@for (j in 0..<indexArray.size step 2) {
474+
pause()
475+
val Bidx = indexArray[j]
476+
val Cidx = indexArray[j + 1]
477+
for (r in appq) {
478+
val left = dp[p][r][Bidx]
479+
if (left == null) continue
480+
val right = dp[r][q][Cidx]
481+
if (right == null) continue
482+
// Found a parse for A
483+
rhsPairs += left * right
484+
// if (rhsPairs.size > 10) break@outerLoop
485+
}
486+
}
487+
488+
val list = rhsPairs.toTypedArray()
489+
if (rhsPairs.isNotEmpty()) {
490+
if (list.size > maxChildren) {
491+
maxChildren = list.size
492+
location = p to q
493+
}
494+
dp[p][q][Aidx] = if (list.size == 1) list.first() else GRE.CUP(*list)
495+
}
496+
}
497+
}
498+
}
499+
500+
println("Completed parse matrix in: ${timer.elapsedNow()}")
501+
502+
// 4) Gather final parse trees from dp[0][f][startIdx], for all final states f
503+
val allParses = levFSA.finalIdxs.mapNotNull { q -> dp[0][q][startIdx] }
504+
505+
println("Parsing took ${timer.elapsedNow()} with |σ|=${brokenStr.size}, " +
506+
"|Q|=$nStates, |G|=${cfg.size}, maxBranch=$maxBranch, |V|=$width, |Σ|=$tms, maxChildren=$maxChildren@$location")
507+
// 5) Combine them under a single GRE
508+
return if (allParses.isEmpty()) null else GRE.CUP(*allParses.toTypedArray())
380509
}

src/jvmTest/kotlin/ai/hypergraph/kaliningraph/repair/ProbabilisticLBH.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package ai.hypergraph.kaliningraph.repair
22

33
import Grammars
4+
import ai.hypergraph.kaliningraph.automata.GRE
45
import ai.hypergraph.kaliningraph.automata.repairWithGRE
56
import ai.hypergraph.kaliningraph.automata.toDFA
67
import ai.hypergraph.kaliningraph.parsing.*
@@ -469,7 +470,7 @@ class ProbabilisticLBH {
469470
val ds = Grammars.dsNorm
470471
val la = makeLevFSA(prompt.tokenizeByWhitespace(), 4)
471472

472-
val gre: GRE = repairWithGRE(prompt.tokenizeByWhitespace(), ds)!!
473+
// val gre: GRE = repairWithGRE(prompt.tokenizeByWhitespace(), ds)!!
473474
// gre.showEditable()
474475

475476
// println(la.stateLst)

0 commit comments

Comments
 (0)