@@ -2,14 +2,12 @@ package ai.hypergraph.kaliningraph.automata
22
33import ai.hypergraph.kaliningraph.KBitSet
44import ai.hypergraph.kaliningraph.parsing.*
5- import ai.hypergraph.kaliningraph.repair.LED_BUFFER
6- import ai.hypergraph.kaliningraph.repair.MAX_RADIUS
7- import ai.hypergraph.kaliningraph.sampling.randomString
5+ import ai.hypergraph.kaliningraph.repair.*
86import ai.hypergraph.kaliningraph.tensor.UTMatrix
97import ai.hypergraph.kaliningraph.types.*
10- import kotlin.collections.plus
11- import kotlin.math.absoluteValue
12- import kotlin.math.max
8+ import kotlinx.coroutines.delay
9+ import kotlin.math.*
10+ import kotlin.time.Duration.Companion.nanoseconds
1311import kotlin.time.TimeSource
1412
1513// Generalized regular expression: https://planetmath.org/generalizedregularexpression
@@ -374,7 +372,138 @@ fun repairWithGRE(brokenStr: List<Σᐩ>, cfg: CFG): GRE? {
374372 // 4) Gather final parse trees from dp[0][f][startIdx], for all final states f
375373 val allParses = levFSA.finalIdxs.mapNotNull { q -> dp[0 ][q][startIdx] }
376374
377- val clock = TimeSource .Monotonic .markNow()
378375 // 5) Combine under a single GRE
379376 return if (allParses.isEmpty()) null else GRE .CUP (* allParses.toTypedArray())
377+ }
378+
379+ fun initiateSerialRepair (brokenStr : List <Σᐩ>, cfg : CFG ): Sequence <Σᐩ> =
380+ repairWithGRE(brokenStr, cfg)?.words(cfg.tmLst) ? : emptySequence()
381+
382+ // Same as serial repair, but with strategic pauses to prevent stuttering on single-threaded runtimes
383+ suspend fun initiateSuspendableRepair (brokenStr : List <Σᐩ>, cfg : CFG ): GRE ? {
384+ var i = 0
385+ val upperBound = MAX_RADIUS * 3
386+ // val monoEditBounds = cfg.maxParsableFragmentB(brokenStr, pad = upperBound)
387+ val timer = TimeSource .Monotonic .markNow()
388+ val bindex = cfg.bindex
389+ val width = cfg.nonterminals.size
390+ val vindex = cfg.vindex
391+ val ups = cfg.unitProductions
392+ val t2vs = cfg.tmToVidx
393+ val maxBranch = vindex.maxOf { it.size }
394+ val startIdx = bindex[START_SYMBOL ]
395+
396+ suspend fun pause (freq : Int = 300_000) { if (i++ % freq == 0 ) { delay(50 .nanoseconds) }}
397+
398+ suspend fun nonemptyLevInt (levFSA : FSA ): Int? {
399+ val ap: List <List <List <Int >? >> = levFSA.allPairs
400+ val dp = Array (levFSA.numStates) { Array (levFSA.numStates) { BooleanArray (width) { false } } }
401+
402+ levFSA.allIndexedTxs0(ups, bindex).forEach { (q0, nt, q1) -> dp[q0][q1][nt] = true }
403+ var minRad: Int = Int .MAX_VALUE
404+
405+ // For pairs (p,q) in topological order
406+ for (dist: Int in 1 .. < dp.size) {
407+ for (iP: Int in 0 .. < dp.size - dist) {
408+ val p = iP
409+ val q = iP + dist
410+ if (ap[p][q] == null ) continue
411+ val appq = ap[p][q]!!
412+ for ((A : Int , indexArray: IntArray ) in vindex.withIndex()) {
413+ pause()
414+ outerloop@for(j: Int in 0 .. <indexArray.size step 2) {
415+ val B = indexArray[j]
416+ val C = indexArray[j + 1]
417+ for (r in appq)
418+ if (dp[p][r][B ] && dp[r][q][C ]) {
419+ dp[p][q][A ] = true
420+ break@outerloop
421+ }
422+ }
423+
424+ if (p == 0 && A == startIdx && q in levFSA.finalIdxs && dp[p][q][A ]) {
425+ val (x, y) = levFSA.idsToCoords[q]!!
426+ / ** See final state conditions for [makeExactLevCFL] * /
427+ // The minimum radius such that this final state is included in the L - FSA
428+ minRad = minOf(minRad, (brokenStr.size - x + y).absoluteValue)
429+ }
430+ }
431+ }
432+ }
433+
434+ return if (minRad == Int .MAX_VALUE ) null else minRad
435+ }
436+
437+ val led = (3.. <upperBound)
438+ .firstNotNullOfOrNull { nonemptyLevInt(makeLevFSA(brokenStr, it)) } ?:
439+ upperBound.also { println("Hit upper bound") }
440+ val radius = led + LED_BUFFER
441+
442+ println("Identified LED = $led, radius= $radius in ${timer.elapsedNow()}")
443+
444+ val levFSA = makeLevFSA(brokenStr, radius)
445+
446+ val nStates = levFSA.numStates
447+ val tml = cfg.tmLst
448+ val tms = tml.size
449+ val tmm = cfg.tmMap
450+
451+ // 1) Create dp array of parse trees
452+ val dp: Array <Array <Array <GRE ?>>> = Array (nStates) { Array (nStates) { Array (width) { null } } }
453+
454+ // 2) Initialize terminal productions A -> a
455+ val aitx = levFSA.allIndexedTxs1(ups)
456+ for ((p, σ, q) in aitx) for (Aidx in t2vs[tmm[σ]!! ])
457+ dp[p][q][Aidx ] = ((dp[p][q][Aidx ] as ? GRE .SET ) ? : GRE .SET (tms))
458+ .apply { pause(); s.set(tmm[σ]!! )/* ; dq[p][q].set(Aidx)*/ }
459+
460+ var maxChildren = 0
461+ var location = - 1 to - 1
462+
463+ // 3) CYK + Floyd Warshall parsing
464+ for (dist in 1 until nStates) {
465+ for (p in 0 until (nStates - dist)) {
466+ val q = p + dist
467+ if (levFSA.allPairs[p][q] == null ) continue
468+ val appq = levFSA.allPairs[p][q]!!
469+
470+ for ((Aidx , indexArray) in vindex.withIndex()) {
471+ // println("${cfg.bindex[Aidx]}(${pm!!.ntLengthBounds[Aidx]}):${levFSA.stateLst[p]}-${levFSA.stateLst[q]}(${levFSA.SPLP(p, q)})")
472+ val rhsPairs = dp[p][q][Aidx ]?.let { mutableListOf (it) } ? : mutableListOf ()
473+ outerLoop@for (j in 0 .. <indexArray.size step 2) {
474+ pause()
475+ val Bidx = indexArray[j]
476+ val Cidx = indexArray[j + 1]
477+ for (r in appq) {
478+ val left = dp[p][r][Bidx ]
479+ if (left == null) continue
480+ val right = dp[r][q][Cidx ]
481+ if (right == null) continue
482+ // Found a parse for A
483+ rhsPairs + = left * right
484+ // if (rhsPairs.size > 10 ) break @outerLoop
485+ }
486+ }
487+
488+ val list = rhsPairs.toTypedArray()
489+ if (rhsPairs.isNotEmpty()) {
490+ if (list.size > maxChildren) {
491+ maxChildren = list.size
492+ location = p to q
493+ }
494+ dp[p][q][Aidx ] = if (list.size == 1 ) list.first() else GRE .CUP (* list)
495+ }
496+ }
497+ }
498+ }
499+
500+ println (" Completed parse matrix in: ${timer.elapsedNow()} " )
501+
502+ // 4) Gather final parse trees from dp[0][f][startIdx], for all final states f
503+ val allParses = levFSA.finalIdxs.mapNotNull { q -> dp[0 ][q][startIdx] }
504+
505+ println (" Parsing took ${timer.elapsedNow()} with |σ|=${brokenStr.size} , " +
506+ " |Q|=$nStates , |G|=${cfg.size} , maxBranch=$maxBranch , |V|=$width , |Σ|=$tms , maxChildren=$maxChildren @$location " )
507+ // 5) Combine them under a single GRE
508+ return if (allParses.isEmpty()) null else GRE .CUP (* allParses.toTypedArray())
380509}
0 commit comments