1+ package cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.books
2+
3+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.Career
4+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.JournalEntry
5+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.Trapping
6+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.Document
7+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.Lexer
8+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.RulesParser
9+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.TextPosition
10+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.TextToken
11+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.Token
12+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.TokenStream
13+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.TwoColumnPdfLexer
14+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.trappings.ArmourParser
15+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.parsers.trappings.description.HeadingDescriptionParser
16+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.sources.CareerSource
17+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.sources.JournalEntrySource
18+ import cz.frantisekmasa.wfrp_master.common.compendium.domain.importer.sources.TrappingSource
19+ import kotlin.math.max
20+ import kotlin.math.min
21+
22+ object ArchivesOfTheEmpire3 : Book, JournalEntrySource, TrappingSource, CareerSource {
23+ override val name = " Archives of the Empire — Volume III"
24+ override val tableFootnotesAsNormalText: Boolean = true
25+
26+ override fun importCareers (document : Document ): List <Career > {
27+ return emptyList()
28+ }
29+
30+ override fun importJournalEntries (document : Document ): List <JournalEntry > {
31+ val lexer = TwoColumnPdfLexer (document, this )
32+ return buildList {
33+ fun parseRules (
34+ pages : Iterable <Int >,
35+ excludedTables : Set <String > = emptySet(),
36+ excludedBoxes : Set <String > = emptySet(),
37+ commonParents : List <String >,
38+ supportedParents : List <List <String >>,
39+ tokenMapper : (Token ) -> Token = { it },
40+ ): List <JournalEntry > {
41+ val entries =
42+ RulesParser (
43+ excludedBoxes = excludedBoxes,
44+ excludedTables = excludedTables,
45+ ).import(
46+ TokenStream (
47+ pages.asSequence()
48+ .flatMap { lexer.getTokens(it).toList() }
49+ .flatten()
50+ .map(tokenMapper),
51+ ),
52+ )
53+
54+ return entries.filter { it.parents in supportedParents }
55+ .map { it.copy(parents = commonParents + it.parents) }
56+ .toList()
57+ }
58+
59+ addAll(
60+ parseRules(
61+ 36 .. 36 ,
62+ commonParents = listOf (" The Consumers’ Guide" ),
63+ supportedParents =
64+ listOf (
65+ listOf (" Armour Rules" , " Armour Qualities" ),
66+ listOf (" Armour Rules" , " Armour Flaws" ),
67+ ),
68+ )
69+ .asSequence()
70+ .map {
71+ it.copy(
72+ parents = listOf (" The Consumers’ Guide" , " Armour" , it.parents.last()),
73+ )
74+ }
75+ )
76+ }
77+ }
78+
79+ override fun importTrappings (document : Document ): List <Trapping > {
80+ return ArmourParser (
81+ document,
82+ this ,
83+ HeadingDescriptionParser (),
84+ lexerModifier = { lexer ->
85+ object : Lexer {
86+ override fun getTokens (page : Int ): Sequence <Token > {
87+ return lexer.getTokens(page).map {
88+ when (it) {
89+ is Token .BoldPart -> Token .TableHeadCell (it.text, it.metadata)
90+ is Token .NormalPart -> Token .BodyCellPart (
91+ text = it.text,
92+ metadata = it.metadata,
93+ )
94+ is Token .Heading2 -> Token .BoxHeader (it.text, it.metadata)
95+ else -> it
96+ }
97+ }
98+ }
99+ }
100+ },
101+ ).parse(37 , 38 .. 38 )
102+ }
103+
104+ override fun areSameStyle (a : TextPosition , b : TextPosition ): Boolean {
105+ return super .areSameStyle(a, b) || arePartsOfHeading2(a, b)
106+ }
107+
108+ private fun arePartsOfHeading2 (
109+ a : TextPosition ,
110+ b : TextPosition ,
111+ ): Boolean {
112+ // Some headings are a mix of 12pt and 18pt font
113+ return a.getFont().getName() == b.getFont().getName() &&
114+ a.getFont().getName().endsWith(" CaslonAntique-Bold-SC700" ) &&
115+ min(a.getFontSizeInPt(), b.getFontSizeInPt()) == 12f &&
116+ max(a.getFontSizeInPt(), b.getFontSizeInPt()) == 18f
117+ }
118+
119+ override fun resolveToken (textToken : TextToken ): Token ? {
120+ if (textToken.fontName.endsWith(" CaslonAntique-Bold-SC700" )) {
121+ if (textToken.fontSizePt == 12f || textToken.fontSizePt == 18f ) {
122+ return Token .Heading2 (textToken)
123+ }
124+
125+ return Token .BoxHeader (textToken)
126+ }
127+
128+ if (textToken.fontName.endsWith(" CaslonAntique" )) {
129+ if (textToken.fontSizePt == 18f ) {
130+ return Token .BoxHeader (textToken)
131+ }
132+ }
133+
134+ if (textToken.fontName.endsWith(" CaslonAntique-Bold" )) {
135+ if (textToken.fontSizePt == 14f ) {
136+ return Token .TableHeading (textToken)
137+ }
138+
139+ if (textToken.fontSizePt == 19f || textToken.fontSizePt == 22f ) {
140+ return Token .Heading1 (textToken)
141+ }
142+
143+ if (textToken.fontSizePt == 10f ) {
144+ return Token .TableHeadCell (textToken)
145+ }
146+ }
147+
148+ if (textToken.fontName.endsWith(" crossbatstfb" ) && textToken.text == " h" ) {
149+ return Token .CrossIcon
150+ }
151+
152+ if (textToken.fontSizePt == 12f && textToken.fontName.endsWith(" ACaslonPro-Bold" )) {
153+ return Token .Heading3 (textToken)
154+ }
155+
156+ if (textToken.fontSizePt == 8f && textToken.fontName.endsWith(" ACaslonPro-Regular" )) {
157+ return Token .BodyCellPart (
158+ text = textToken.text,
159+ metadata = Token .Metadata (
160+ y = textToken.y,
161+ height = textToken.height,
162+ )
163+ )
164+ }
165+
166+ if (textToken.fontSizePt == 9f ) {
167+ if (textToken.fontName.endsWith(" ACaslonPro-Bold" )) {
168+ return Token .BoldPart (textToken)
169+ }
170+
171+ if (textToken.fontName.endsWith(" ACaslonPro-Italic" )) {
172+ return Token .ItalicsPart (textToken)
173+ }
174+
175+ if (textToken.fontName.endsWith(" ACaslonPro-Regular" )) {
176+ return Token .NormalPart (textToken)
177+ }
178+ }
179+
180+ return null
181+ }
182+ }
0 commit comments