Skip to content

Commit 761aebe

Browse files
committed
Add Archives of the Empire 3 compendium import
1 parent f54f9a8 commit 761aebe

File tree

7 files changed

+218
-7
lines changed

7 files changed

+218
-7
lines changed
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
package cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books
2+
3+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.Career
4+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.JournalEntry
5+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.Trapping
6+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.Document
7+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.Lexer
8+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.RulesParser
9+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.TextPosition
10+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.TextToken
11+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.Token
12+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.TokenStream
13+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.TwoColumnPdfLexer
14+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.trappings.ArmourParser
15+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.trappings.description.HeadingDescriptionParser
16+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.sources.CareerSource
17+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.sources.JournalEntrySource
18+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.sources.TrappingSource
19+
import kotlin.math.max
20+
import kotlin.math.min
21+
22+
object ArchivesOfTheEmpire3 : Book, JournalEntrySource, TrappingSource, CareerSource {
23+
override val name = "Archives of the Empire — Volume III"
24+
override val tableFootnotesAsNormalText: Boolean = true
25+
26+
override fun importCareers(document: Document): List<Career> {
27+
return emptyList()
28+
}
29+
30+
override fun importJournalEntries(document: Document): List<JournalEntry> {
31+
val lexer = TwoColumnPdfLexer(document, this)
32+
return buildList {
33+
fun parseRules(
34+
pages: Iterable<Int>,
35+
excludedTables: Set<String> = emptySet(),
36+
excludedBoxes: Set<String> = emptySet(),
37+
commonParents: List<String>,
38+
supportedParents: List<List<String>>,
39+
tokenMapper: (Token) -> Token = { it },
40+
): List<JournalEntry> {
41+
val entries =
42+
RulesParser(
43+
excludedBoxes = excludedBoxes,
44+
excludedTables = excludedTables,
45+
).import(
46+
TokenStream(
47+
pages.asSequence()
48+
.flatMap { lexer.getTokens(it).toList() }
49+
.flatten()
50+
.map(tokenMapper),
51+
),
52+
)
53+
54+
return entries.filter { it.parents in supportedParents }
55+
.map { it.copy(parents = commonParents + it.parents) }
56+
.toList()
57+
}
58+
59+
addAll(
60+
parseRules(
61+
36..36,
62+
commonParents = listOf("The Consumers’ Guide"),
63+
supportedParents =
64+
listOf(
65+
listOf("Armour Rules", "Armour Qualities"),
66+
listOf("Armour Rules", "Armour Flaws"),
67+
),
68+
)
69+
.asSequence()
70+
.map {
71+
it.copy(
72+
parents = listOf("The Consumers’ Guide", "Armour", it.parents.last()),
73+
)
74+
}
75+
)
76+
}
77+
}
78+
79+
override fun importTrappings(document: Document): List<Trapping> {
80+
return ArmourParser(
81+
document,
82+
this,
83+
HeadingDescriptionParser(),
84+
lexerModifier = { lexer ->
85+
object : Lexer {
86+
override fun getTokens(page: Int): Sequence<Token> {
87+
return lexer.getTokens(page).map {
88+
when (it) {
89+
is Token.BoldPart -> Token.TableHeadCell(it.text, it.metadata)
90+
is Token.NormalPart -> Token.BodyCellPart(
91+
text = it.text,
92+
metadata = it.metadata,
93+
)
94+
is Token.Heading2 -> Token.BoxHeader(it.text, it.metadata)
95+
else -> it
96+
}
97+
}
98+
}
99+
}
100+
},
101+
).parse(37, 38..38)
102+
}
103+
104+
override fun areSameStyle(a: TextPosition, b: TextPosition): Boolean {
105+
return super.areSameStyle(a, b) || arePartsOfHeading2(a, b)
106+
}
107+
108+
private fun arePartsOfHeading2(
109+
a: TextPosition,
110+
b: TextPosition,
111+
): Boolean {
112+
// Some headings are a mix of 12pt and 18pt font
113+
return a.getFont().getName() == b.getFont().getName() &&
114+
a.getFont().getName().endsWith("CaslonAntique-Bold-SC700") &&
115+
min(a.getFontSizeInPt(), b.getFontSizeInPt()) == 12f &&
116+
max(a.getFontSizeInPt(), b.getFontSizeInPt()) == 18f
117+
}
118+
119+
override fun resolveToken(textToken: TextToken): Token? {
120+
if (textToken.fontName.endsWith("CaslonAntique-Bold-SC700")) {
121+
if (textToken.fontSizePt == 12f || textToken.fontSizePt == 18f) {
122+
return Token.Heading2(textToken)
123+
}
124+
125+
return Token.BoxHeader(textToken)
126+
}
127+
128+
if (textToken.fontName.endsWith("CaslonAntique")) {
129+
if (textToken.fontSizePt == 18f) {
130+
return Token.BoxHeader(textToken)
131+
}
132+
}
133+
134+
if (textToken.fontName.endsWith("CaslonAntique-Bold")) {
135+
if (textToken.fontSizePt == 14f) {
136+
return Token.TableHeading(textToken)
137+
}
138+
139+
if (textToken.fontSizePt == 19f || textToken.fontSizePt == 22f) {
140+
return Token.Heading1(textToken)
141+
}
142+
143+
if (textToken.fontSizePt == 10f) {
144+
return Token.TableHeadCell(textToken)
145+
}
146+
}
147+
148+
if (textToken.fontName.endsWith("crossbatstfb") && textToken.text == "h") {
149+
return Token.CrossIcon
150+
}
151+
152+
if (textToken.fontSizePt == 12f && textToken.fontName.endsWith("ACaslonPro-Bold")) {
153+
return Token.Heading3(textToken)
154+
}
155+
156+
if (textToken.fontSizePt == 8f && textToken.fontName.endsWith("ACaslonPro-Regular")) {
157+
return Token.BodyCellPart(
158+
text = textToken.text,
159+
metadata = Token.Metadata(
160+
y = textToken.y,
161+
height = textToken.height,
162+
)
163+
)
164+
}
165+
166+
if (textToken.fontSizePt == 9f) {
167+
if (textToken.fontName.endsWith("ACaslonPro-Bold")) {
168+
return Token.BoldPart(textToken)
169+
}
170+
171+
if (textToken.fontName.endsWith("ACaslonPro-Italic")) {
172+
return Token.ItalicsPart(textToken)
173+
}
174+
175+
if (textToken.fontName.endsWith("ACaslonPro-Regular")) {
176+
return Token.NormalPart(textToken)
177+
}
178+
}
179+
180+
return null
181+
}
182+
}

common/src/commonMain/kotlin/cz/frantisekmasa/wfrp_master/common/compendium/domain/importer/parsers/DefaultLayoutPdfLexer.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ class DefaultLayoutPdfLexer(
1010
private val structure: PdfStructure,
1111
private val mergeSubsequentTokens: Boolean = true,
1212
private val sortTokens: Boolean = false,
13-
) {
14-
fun getTokens(page: Int): Sequence<Token> {
13+
): Lexer {
14+
override fun getTokens(page: Int): Sequence<Token> {
1515
val stripper = TextStripper()
1616
stripper.setSortByPosition(sortTokens && !structure.tokensSorted)
1717
stripper.setStartPage(page)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers
2+
3+
interface Lexer {
4+
fun getTokens(page: Int): Sequence<Token>
5+
}

common/src/commonMain/kotlin/cz/frantisekmasa/wfrp_master/common/compendium/domain/importer/parsers/trappings/ArmourParser.kt

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import cz.frantisekmasa.wfrp_master.common.compendium.domain.Trapping
55
import cz.frantisekmasa.wfrp_master.common.compendium.domain.TrappingType
66
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.DefaultLayoutPdfLexer
77
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.Document
8+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.Lexer
89
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.PdfStructure
910
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.TableParser
1011
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.trappings.description.TrappingDescriptionParser
@@ -19,13 +20,14 @@ class ArmourParser(
1920
private val document: Document,
2021
private val structure: PdfStructure,
2122
private val descriptionParser: TrappingDescriptionParser,
23+
private val lexerModifier: (Lexer) -> Lexer = { it },
2224
) {
2325
fun parse(
2426
tablePage: Int,
2527
descriptionPages: IntRange,
2628
): List<Trapping> {
2729
val parser = TableParser()
28-
val lexer = DefaultLayoutPdfLexer(document, structure, mergeSubsequentTokens = false)
30+
val lexer = lexerModifier(DefaultLayoutPdfLexer(document, structure, mergeSubsequentTokens = false))
2931
val table =
3032
parser.findTables(lexer, structure, tablePage, findNames = true)
3133
.asSequence()
@@ -38,13 +40,13 @@ class ArmourParser(
3840
.filter { it.heading != null }
3941
.flatMap { section ->
4042
val armourType =
41-
matchEnumOrNull<ArmourType>(section.heading!!.replace("*", ""))
43+
matchEnumOrNull<ArmourType>(normalizeName(section.heading!!.replace("*", "")))
4244
?: error("Invalid armour type ${section.heading}")
4345

4446
section.rows.map { row ->
4547
val price = PriceParser.parse(row[1])
4648
val penalty = row[4].trim()
47-
val name = row[0].trim()
49+
val name = normalizeName(row[0].trim())
4850
val comparableName = descriptionParser.comparableName(name)
4951

5052
val footnoteNumbers =
@@ -65,13 +67,13 @@ class ArmourParser(
6567
TrappingType.Armour(
6668
type = armourType,
6769
locations = locations(row[5]),
68-
points = ArmourPoints(row[6].toInt()),
70+
points = ArmourPoints(optionalValue(row[6])?.toInt() ?: 0),
6971
qualities = parseFeatures(row[7]),
7072
flaws = parseFeatures(row[7]),
7173
),
7274
description =
7375
buildString {
74-
if (penalty != "" && penalty != "") {
76+
if(optionalValue(penalty) != null) {
7577
append("**Penalty**: $penalty\n")
7678
}
7779

@@ -115,4 +117,17 @@ class ArmourParser(
115117
.filter { location -> location.name.equals(it, ignoreCase = true) }
116118
}.toSet()
117119
}
120+
121+
private fun optionalValue(value: String): String? {
122+
// Archives of the Empire 3 uses "–" for empty cells
123+
return value.takeIf { value.isNotBlank() && value.trim() != "" && value.trim() != "-" }
124+
}
125+
126+
private fun normalizeName(name: String): String {
127+
return name
128+
// Archives of the Empire 3 uses "Chainmail" instead of "Mail"
129+
.replace("Chainmail", "Mail", ignoreCase = true)
130+
// Archives of the Empire 3 uses plural in Armour table
131+
.replace("Soft Kits", "Soft Kit", ignoreCase = true)
132+
}
118133
}

common/src/commonMain/kotlin/cz/frantisekmasa/wfrp_master/common/compendium/domain/importer/parsers/trappings/Utils.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ inline fun <reified T : Enum<T>> matchEnumOrNull(
1010

1111
val comparableValue =
1212
value
13+
.trim()
1314
.replace('-', '_')
1415
.replace(' ', '_')
1516

common/src/jvmTest/kotlin/CompendiumImportRegressionTest.kt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.PdfCompendiumImporter
22
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.ArchivesOfTheEmpire1
33
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.ArchivesOfTheEmpire2
4+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.ArchivesOfTheEmpire3
45
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.Book
56
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.CoreRulebook
67
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.EnemyInShadowsCompanion
@@ -64,6 +65,11 @@ class CompendiumImportRegressionTest {
6465
assertMatchesPreviousRuns(ArchivesOfTheEmpire2)
6566
}
6667

68+
@Test
69+
fun `Archives of The Empire III`() {
70+
assertMatchesPreviousRuns(ArchivesOfTheEmpire3)
71+
}
72+
6773
@Test
6874
fun `Sea of Claws`() {
6975
assertMatchesPreviousRuns(SeaOfClaws)

common/src/jvmTest/kotlin/ConfigProvider.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.ArchivesOfTheEmpire1
22
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.ArchivesOfTheEmpire2
3+
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.ArchivesOfTheEmpire3
34
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.Book
45
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.CoreRulebook
56
import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books.EnemyInShadowsCompanion
@@ -34,6 +35,7 @@ object ConfigProvider {
3435
EnemyInShadowsCompanion -> "enemy_in_shadows_companion.pdf"
3536
ArchivesOfTheEmpire1 -> "archives_of_the_empire_1.pdf"
3637
ArchivesOfTheEmpire2 -> "archives_of_the_empire_2.pdf"
38+
ArchivesOfTheEmpire3 -> "archives_of_the_empire_3.pdf"
3739
SeaOfClaws -> "sea_of_claws.pdf"
3840
else -> null
3941
}

0 commit comments

Comments
 (0)