Skip to content

Commit 7c5e3ee

Browse files
committed
Merge remote-tracking branch 'origin/master'
2 parents c0cd41d + f006384 commit 7c5e3ee

File tree

10 files changed

+138
-233
lines changed

10 files changed

+138
-233
lines changed

src/commonMain/kotlin/ai/hypergraph/kaliningraph/CommonUtils.kt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ infix fun Collection<Any>.anyAre(that: Any) = any { it isA that }
129129
* Minimal pure-Kotlin bit set for indices [0..n-1].
130130
*/
131131
class KBitSet(private val n: Int) {
132+
constructor(n: Int, v: Int) : this(n) { set(v) }
133+
constructor(n: Int, v: Collection<Int>) : this(n) { v.forEach { set(it) } }
132134
// Each element of 'data' holds 64 bits, covering up to n bits total.
133135
private val data = LongArray((n + 63) ushr 6)
134136

@@ -154,8 +156,8 @@ class KBitSet(private val n: Int) {
154156
for (i in data.indices) data[i] = data[i] and other.data[i]
155157
}
156158

157-
fun toSet(): Set<Int> {
158-
val result = mutableSetOf<Int>()
159+
fun toList(): List<Int> {
160+
val result = mutableListOf<Int>()
159161
for (i in 0 until n) if (get(i)) result.add(i)
160162
return result
161163
}

src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/AFSA.kt

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,13 @@ class AFSA(override val Q: TSA, override val init: Set<Σᐩ>, override val fina
4949
return result
5050
}
5151

52-
// Since the FSA is acyclic, we can use a more efficient topological ordering
52+
/** See [FSA.intersectPTree] for why this is needed*/
5353
override val stateLst by lazy {
54-
topSort()
54+
// Since the FSA is acyclic, we can use a more efficient topsort -
55+
// This trick will only work for Levenshtein FSAs (otherwise use topSort())
56+
states.groupBy { it.coords().let { (a, b) -> a + b } }.values.flatten()
57+
58+
// topSort()
5559
// .also {
5660
// if (it.size != states.size)
5761
// throw Exception("Contained ${states.size} but ${it.size} topsorted indices:\n" +
@@ -64,7 +68,7 @@ class AFSA(override val Q: TSA, override val init: Set<Σᐩ>, override val fina
6468
}
6569

6670
// Assumes stateLst is already in topological order:
67-
override val allPairs: Map<Pair<Int, Int>, Set<Int>> by lazy {
71+
override val allPairs: List<List<List<Int>?>> by lazy {
6872
val fwdAdj = Array(numStates) { mutableListOf<Int>() }
6973
val revAdj = Array(numStates) { mutableListOf<Int>() }
7074

@@ -96,34 +100,14 @@ class AFSA(override val Q: TSA, override val init: Set<Σᐩ>, override val fina
96100
// i.e. if post[i].get(j) == false => empty set.
97101
//
98102
// We'll reuse a single KBitSet 'tmp' to avoid allocations:
99-
val result = mutableMapOf<Pair<Int, Int>, Set<Int>>()
103+
val result: List<MutableList<List<Int>?>> = List(states.size) { MutableList(states.size) { null } }
100104

101-
for (i in 0 until numStates) {
102-
for (j in i until numStates) {
103-
when {
104-
i == j -> {
105-
// The trivial path i->i has just i on it (assuming zero-length path is allowed).
106-
result[i to i] = emptySet()
107-
}
108-
!post[i].get(j) -> {
109-
// i < j, but j is not actually reachable from i
110-
result[i to j] = emptySet()
111-
// In a DAG, j->i is definitely unreachable if j > i, so:
112-
result[j to i] = emptySet()
113-
}
114-
else -> {
115-
// i < j and j is reachable from i => do the intersection of post[i] & pre[j].
116-
val tmp = KBitSet(numStates)
117-
tmp.or(post[i])
118-
tmp.and(pre[j])
119-
result[i to j] = tmp.toSet()
120-
121-
// j>i => definitely unreachable for j->i in a DAG
122-
result[j to i] = emptySet()
123-
}
124-
}
105+
for (i in 0 until numStates) for (j in i + 1 until numStates)
106+
when {
107+
!post[i].get(j) -> { }
108+
// i < j and j is reachable from i => do the intersection of post[i] & pre[j].
109+
else -> result[i][j] = KBitSet(numStates).apply { or(post[i]); and(pre[j]) }.toList()
125110
}
126-
}
127111

128112
result
129113
}

src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/FSA.kt

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,19 @@ open class FSA constructor(open val Q: TSA, open val init: Set<Σᐩ>, open val
3434
val states: Set<Σᐩ> by lazy { Q.states() }
3535
open val stateLst: List<Σᐩ> by lazy { states.toList() }
3636

37-
fun allIndexedTxs1(cfg: CFG): List<Π3<Int, Σᐩ, Int>> =
38-
(cfg.unitProductions * nominalForm.flattenedTriples).filter { (_, σ: Σᐩ, arc) -> (arc.π2)(σ) }
39-
.map { (A: Σᐩ, σ: Σᐩ, arc) -> Triple(stateMap[arc.π1]!!, σ, stateMap[arc.π3]!!) }
37+
fun allIndexedTxs1(unitProds: Set<Π2A<Σᐩ>>): List<Π3<Int, Σᐩ, Int>> {
38+
val triples = mutableListOf<Π3<Int, Σᐩ, Int>>()
39+
for ((A, σ) in unitProds) for (arc in nominalForm.flattenedTriples)
40+
if (arc.π2(σ)) triples.add(Triple(stateMap[arc.π1]!!, σ, stateMap[arc.π3]!!))
41+
return triples
42+
}
4043

41-
fun allIndexedTxs0(cfg: CFG): List<Π3A<Int>> =
42-
(cfg.unitProductions * nominalForm.flattenedTriples).filter { (_, σ: Σᐩ, arc) -> (arc.π2)(σ) }
43-
.map { (A: Σᐩ, _, arc) -> Triple(stateMap[arc.π1]!!, cfg.bindex[A], stateMap[arc.π3]!!) }
44+
fun allIndexedTxs0(unitProds: Set<Π2A<Σᐩ>>, bindex: Bindex<Σᐩ>): List<Π3A<Int>> {
45+
val triples = mutableListOf<Π3A<Int>>()
46+
for ((A, σ) in unitProds) for (arc in nominalForm.flattenedTriples)
47+
if (arc.π2(σ)) triples.add(Triple(stateMap[arc.π1]!!, bindex[A], stateMap[arc.π3]!!))
48+
return triples
49+
}
4450

4551
val numStates: Int by lazy { states.size }
4652

@@ -54,10 +60,14 @@ open class FSA constructor(open val Q: TSA, open val init: Set<Σᐩ>, open val
5460
}
5561

5662
// TODO: should be a way to compute this on the fly for L-automata (basically a Cartesian grid)
57-
open val allPairs: Map<Pair<Int, Int>, Set<Int>> by lazy {
58-
graph.allPairs.entries.associate { (a, b) ->
59-
Pair(Pair(stateMap[a.first.label]!!, stateMap[a.second.label]!!), b.map { stateMap[it.label]!! }.toSet())
63+
open val allPairs: List<List<List<Int>?>> by lazy {
64+
val aps: List<MutableList<MutableList<Int>?>> =
65+
List(states.size) { MutableList(states.size) { null } }
66+
graph.allPairs.entries.forEach { (a, b) ->
67+
val temp = b.map { stateMap[it.label]!! }.toMutableList()
68+
aps[stateMap[a.first.label]!!][stateMap[a.second.label]!!] = temp
6069
}
70+
aps
6171
}
6272

6373
val finalIdxs by lazy { final.map { stateMap[it]!! } }
@@ -122,10 +132,11 @@ open class FSA constructor(open val Q: TSA, open val init: Set<Σᐩ>, open val
122132
val bindex = cfg.bindex
123133
val width = cfg.nonterminals.size
124134
val vindex = cfg.vindex
125-
val ap: Map<Pair<Int, Int>, Set<Int>> = levFSA.allPairs
135+
val ups = cfg.unitProductions
136+
val aps: List<List<List<Int>?>> = levFSA.allPairs
126137
val dp = Array(levFSA.numStates) { Array(levFSA.numStates) { BooleanArray(width) { false } } }
127138

128-
levFSA.allIndexedTxs0(cfg).forEach { (q0, nt, q1) -> dp[q0][q1][nt] = true }
139+
levFSA.allIndexedTxs0(ups, bindex).forEach { (q0, nt, q1) -> dp[q0][q1][nt] = true }
129140

130141
val startIdx = bindex[START_SYMBOL]
131142

@@ -134,8 +145,8 @@ open class FSA constructor(open val Q: TSA, open val init: Set<Σᐩ>, open val
134145
for (iP in 0 until levFSA.numStates - dist) {
135146
val p = iP
136147
val q = iP + dist
137-
if (p to q !in ap) continue
138-
val appq = ap[p to q]!!
148+
if (aps[p][q] == null) continue
149+
val appq = aps[p][q]!!
139150
for ((A, indexArray) in vindex.withIndex()) {
140151
outerloop@for(j in 0..<indexArray.size step 2) {
141152
val B = indexArray[j]
@@ -174,6 +185,7 @@ open class FSA constructor(open val Q: TSA, open val init: Set<Σᐩ>, open val
174185
val bimap = cfg.bimap
175186
val width = cfg.nonterminals.size
176187
val vindex = cfg.vindex
188+
val ups = cfg.unitProductions
177189

178190
val nStates = levFSA.numStates
179191
val startIdx = bindex[START_SYMBOL]
@@ -182,7 +194,7 @@ open class FSA constructor(open val Q: TSA, open val init: Set<Σᐩ>, open val
182194
val dp: Array<Array<Array<PTree?>>> = Array(nStates) { Array(nStates) { Array(width) { null } } }
183195

184196
// 2) Initialize terminal productions A -> a
185-
val aitx = levFSA.allIndexedTxs1(cfg)
197+
val aitx = levFSA.allIndexedTxs1(ups)
186198
for ((p, σ, q) in aitx) {
187199
val Aidxs = bimap.TDEPS[σ]!!.map { bindex[it] }
188200
for (Aidx in Aidxs) {
@@ -195,8 +207,8 @@ open class FSA constructor(open val Q: TSA, open val init: Set<Σᐩ>, open val
195207
for (dist in 0 until nStates) {
196208
for (p in 0 until (nStates - dist)) {
197209
val q = p + dist
198-
if (p to q !in levFSA.allPairs) continue
199-
val appq = levFSA.allPairs[p to q]!!
210+
if (levFSA.allPairs[p][q] == null) continue
211+
val appq = levFSA.allPairs[p][q]!!
200212
for ((Aidx, indexArray) in vindex.withIndex()) {
201213
val rhsPairs = dp[p][q][Aidx]?.branches?.toMutableList() ?: mutableListOf()
202214
outerLoop@for (j in 0..<indexArray.size step 2) {
Lines changed: 63 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,85 @@
11
package ai.hypergraph.kaliningraph.automata
22

3+
import ai.hypergraph.kaliningraph.KBitSet
34
import ai.hypergraph.kaliningraph.parsing.*
45
import ai.hypergraph.kaliningraph.tensor.UTMatrix
6+
import ai.hypergraph.kaliningraph.tokenizeByWhitespace
57
import ai.hypergraph.kaliningraph.types.*
68

79
// Generalized regular expression: https://planetmath.org/generalizedregularexpression
810
// Parsing with derivatives: https://matt.might.net/papers/might2011derivatives.pdf
9-
sealed class GRE(vararg val args: GRE) {
10-
companion object { operator fun invoke(s: Σᐩ) = ONE(s) }
11-
11+
sealed class GRE(open vararg val args: GRE) {
1212
class EPS: GRE()
13-
class ONE(val s: Σᐩ): GRE()
14-
class SET(val s: Set<Σᐩ>): GRE()
15-
class NEG(val g: GRE): GRE(g)
16-
class UNI(val l: GRE, val r: GRE): GRE(l, r)
13+
class SET(val s: KBitSet): GRE() { constructor(size: Int): this(KBitSet(size)) }
14+
class UNI(override vararg val args: GRE): GRE(*args)
1715
class CAT(val l: GRE, val r: GRE): GRE(l, r)
18-
class INT(val l: GRE, val r: GRE): GRE(l, r)
1916

20-
infix fun and(a: GRE): GRE = INT(this, a)
17+
fun words(terminals: List<Σᐩ>): Sequence<Σᐩ> =
18+
enumerate().distinct().map { it.mapNotNull { terminals[it].let { if (it == "ε") null else it } }.joinToString(" ") }
19+
20+
// F_s(g) = { s | ∂_s(g) != ∅ }
21+
// fun GRE.followSet(): KBitSet = when (this) {
22+
// is EPS -> KBitSet()
23+
// is SET -> s
24+
// is UNI -> args.map { it.followSet() }.fold (KBitSet()) { a, b -> a or b }
25+
// is CAT -> l.followSet()
26+
// }
27+
28+
fun enumerate(): Sequence<List<Int>> = sequence {
29+
when (this@GRE) {
30+
is EPS -> emptyList<Int>()
31+
is SET -> yieldAll(s.toList().map { listOf(it) })
32+
is UNI -> for (a in args) yieldAll(a.enumerate())
33+
// yieldAll(args.map { it.enumerate().toSet() }.reduce { a, b -> a + b })
34+
is CAT -> for (lhs in l.enumerate()) for (rhs in r.enumerate())
35+
if (lhs.isEmpty()) {
36+
if (rhs.isEmpty()) yield(emptyList()) else rhs
37+
} else {
38+
if (rhs.isEmpty()) yield(lhs)
39+
else yield(lhs + rhs)
40+
}
41+
}
42+
}
43+
44+
// ∂_s(g) = { w | s·w ∈ L(g) }
45+
// fun dv(s: Σᐩ): GRE? = when (this) {
46+
// is EPS -> null // ∂_s(ε) = ∅
47+
// is SET -> if (s in this.s) EPS() else NIL
48+
// is UNI -> args.reduce { a, b -> a + b }
49+
// is CAT -> {
50+
// // ∂_s(E1 · E2) = (∂_s(E1)) · E2 ∪ [if E1 nullable => ∂_s(E2)]
51+
// val dLeft = l.dv(s) * r
52+
// if (l.nullable()) dLeft + r.dv(s) else dLeft
53+
// }
54+
// }
55+
56+
// Check whether 'g' accepts the empty string ε.
57+
fun nullable(): Boolean = when (this) {
58+
is EPS -> true
59+
is SET -> false
60+
is UNI -> args.any { it.nullable() }
61+
is CAT -> l.nullable() && r.nullable()
62+
}
63+
2164
operator fun plus(g: GRE): GRE = UNI(this, g)
2265
operator fun times(g: GRE): GRE = CAT(this, g)
23-
operator fun not(): GRE = NEG(this)
2466

25-
override fun toString(): String = when (this) {
26-
is ONE -> s
27-
is SET -> "( ${s.joinToString(" ")} )"
28-
is NEG -> "! ( $g )"
29-
is UNI -> "( $l$r )"
30-
is CAT -> "$l $r"
31-
is INT -> "$l$r"
32-
is EPS -> "ε"
33-
}
67+
// override fun toString() = when (this) {
68+
// is EPS -> "ε"
69+
// is SET -> if (s.isEmpty()) "∅" else "( ${s.joinToString(" ")} )"
70+
// is UNI -> "( ${args.joinToString(" ∪ "){ "$it" }} )"
71+
// is CAT -> "$l $r"
72+
// }
3473
}
3574

36-
37-
fun CFG.initGREListMat(tokens: List<String>): UTMatrix<List<GRE?>> =
75+
fun CFG.initGREListMat(tokens: List<Σᐩ>): UTMatrix<List<GRE?>> =
3876
UTMatrix(
3977
ts = tokens.map { token ->
4078
val ptreeList = MutableList<GRE?>(nonterminals.size) { null }
4179
(if (token != HOLE_MARKER) bimap[listOf(token)] else unitNonterminals)
4280
.associateWith { nt ->
43-
if (token != HOLE_MARKER) GRE.ONE(token)
44-
else bimap.UNITS[nt]?.let { GRE.SET(it) }
81+
if (token != HOLE_MARKER) GRE.SET(KBitSet(terminals.size, tmMap[token]!!))
82+
else bimap.UNITS[nt]?.let { GRE.SET(KBitSet(tmLst.size, it.map { tmMap[it]!! })) }
4583
}.forEach { (k, v) -> ptreeList[bindex[k]] = v }
4684
ptreeList
4785
}.toTypedArray(),
@@ -66,6 +104,5 @@ fun CFG.greJoin(left: List<GRE?>, right: List<GRE?>): List<GRE?> = vindex2.map {
66104
if (t.isEmpty()) null else t.reduce { acc, int -> if (acc == null) int else if (int == null) acc else acc + int }
67105
}
68106

69-
fun CFG.startGRE(tokens: List<String>): GRE? =
70-
initGREListMat(tokens).seekFixpoint().diagonals.last()[0][bindex[START_SYMBOL]]
71-
107+
fun CFG.startGRE(tokens: List<Σᐩ>): GRE? =
108+
initGREListMat(tokens).seekFixpoint().diagonals.last()[0][bindex[START_SYMBOL]]

src/commonMain/kotlin/ai/hypergraph/kaliningraph/cache/LRUCache.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ class LRUCache<K, V>(
66
private val sizeOf: (key: K, value: V) -> Int = { _, _ -> 1 }
77
) {
88
val map: LinkedHashMap<K, V> = LinkedHashMap(0, .75f)
9-
private var size: Int = 0
9+
var size: Int = 0
1010

1111
fun getOrPut(key: K, value: () -> V): V =
1212
map[key] ?: value().also { put(key, it) }

src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/CFG.kt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import kotlin.jvm.JvmName
99
import kotlin.random.Random
1010
import kotlin.time.*
1111
import kotlin.time.Duration.Companion.seconds
12+
import kotlin.to
1213

1314
typealias Σᐩ = String
1415
typealias Production = Π2<Σᐩ, List<Σᐩ>>
@@ -60,7 +61,10 @@ val CFG.unicodeMap by cache { terminals.associateBy { Random(it.hashCode()).next
6061
val CFG.ntLst by cache { (symbols + "ε").toList() }
6162
val CFG.ntMap by cache { ntLst.mapIndexed { i, s -> s to i }.toMap() }
6263

63-
val CFG.tripleIntProds: Set<Π3A<Int>> by cache { bimap.TRIPL.map { (a, b, c) -> bindex[a] to bindex[b] to bindex[c] }.toSet() }
64+
val CFG.tmLst by cache { terminals.toList() }
65+
val CFG.tmMap by cache { tmLst.mapIndexed { i, s -> s to i }.toMap() }
66+
67+
val CFG.tripleIntProds: Set<Π3A<Int>> by cache { bimap.TRIPL.map { (a, b, c) -> Triple(bindex[a], bindex[b], bindex[c]) }.toSet() }
6468
val CFG.revUnitProds: Map<Σᐩ, List<Int>> by cache { terminals.associate { it to bimap[listOf(it)].map { bindex[it] } } }
6569

6670
// Maps each nonterminal to the set of nonterminal pairs that can generate it,
@@ -272,7 +276,7 @@ class BiMap(val cfg: CFG) {
272276
}
273277
val TRIPL: List<Π3A<Σᐩ>> by lazy {
274278
R2LHS.filter { it.key.size == 2 }
275-
.map { it.value.map { v -> v to it.key[0] to it.key[1] } }.flatten()
279+
.map { it.value.map { v -> Triple(v, it.key[0], it.key[1]) } }.flatten()
276280
}
277281
val X2WZ: Map<Σᐩ, List<Π3A<Σᐩ>>> by lazy {
278282
TRIPL.groupBy { it.second }.mapValues { it.value }

0 commit comments

Comments
 (0)