Skip to content

Commit 07216a6

Browse files
committed
filter by doc type
1 parent 4218a15 commit 07216a6

8 files changed

Lines changed: 269 additions & 109 deletions

File tree

analysis/src/java/com/github/oeuvres/alix/ingest/AlixSaxHandler.java

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ public void startElement(String uri, String localName, String qName, Attributes
200200
switch (localName) {
201201
case "set" -> startSet();
202202
case "book" -> startBook(atts);
203-
case "document", "article" -> startAtomicDocument(atts, localName);
203+
case "document", "article" -> startArticle(atts, localName);
204204
case "chapter" -> startChapter(atts);
205205
case "field" -> startField(atts);
206206
default -> throw new SAXException("Unsupported alix element: " + nameForTag(qName, localName));
@@ -243,7 +243,7 @@ public void endElement(String uri, String localName, String qName) throws SAXExc
243243
switch (localName) {
244244
case "field" -> endField();
245245
case "chapter" -> endChapter();
246-
case "document", "article" -> endAtomicDocument(localName);
246+
case "document", "article" -> endArticle(localName);
247247
case "book" -> endBook();
248248
case "set" -> endSet();
249249
default -> {
@@ -451,7 +451,8 @@ private void startBook(Attributes atts) throws SAXException
451451
openDoc = OpenDoc.BOOK;
452452
scopes.push(Scope.BOOK);
453453

454-
addSyntheticField(ALIX_FILESTEM, AlixDocument.FieldType.CATEGORY, fileStem);
454+
addMetaField(ALIX_FILESTEM, AlixDocument.FieldType.CATEGORY, fileStem);
455+
addMetaField(ALIX_TYPE, AlixDocument.FieldType.CATEGORY, BOOK);
455456
}
456457

457458
/**
@@ -483,7 +484,7 @@ private void endBook() throws SAXException
483484
* @param atts SAX attributes (must contain {@code xml:id})
484485
* @param eltName {@code "document"} or {@code "article"} (for error messages)
485486
*/
486-
private void startAtomicDocument(Attributes atts, String eltName) throws SAXException
487+
private void startArticle(Attributes atts, String eltName) throws SAXException
487488
{
488489
ensureNoField("alix:" + eltName);
489490
Scope parent = currentScope();
@@ -496,10 +497,11 @@ private void startAtomicDocument(Attributes atts, String eltName) throws SAXExce
496497
openDoc = OpenDoc.DOCUMENT;
497498
scopes.push(Scope.DOCUMENT);
498499

499-
addSyntheticField(ALIX_FILESTEM, AlixDocument.FieldType.CATEGORY, fileStem);
500+
addMetaField(ALIX_FILESTEM, AlixDocument.FieldType.CATEGORY, fileStem);
501+
addMetaField(ALIX_TYPE, AlixDocument.FieldType.CATEGORY, ARTICLE);
500502
}
501503

502-
private void endAtomicDocument(String eltName) throws SAXException
504+
private void endArticle(String eltName) throws SAXException
503505
{
504506
ensureNoField("</alix:" + eltName + ">");
505507
ensureScope(Scope.DOCUMENT, "</alix:" + eltName + ">");
@@ -555,9 +557,10 @@ private void startChapter(Attributes atts) throws SAXException
555557
openDoc = OpenDoc.CHAPTER;
556558
scopes.push(Scope.CHAPTER);
557559

558-
addSyntheticField(ALIX_FILESTEM, AlixDocument.FieldType.CATEGORY, fileStem);
559-
addSyntheticField(ALIX_BOOKID, AlixDocument.FieldType.CATEGORY, currentBookId);
560-
addSyntheticField(ALIX_ORD, AlixDocument.FieldType.INT, Integer.toString(chapterOrd));
560+
addMetaField(ALIX_FILESTEM, AlixDocument.FieldType.CATEGORY, fileStem);
561+
addMetaField(ALIX_TYPE, AlixDocument.FieldType.CATEGORY, CHAPTER);
562+
addMetaField(ALIX_BOOKID, AlixDocument.FieldType.CATEGORY, currentBookId);
563+
addMetaField(ALIX_ORD, AlixDocument.FieldType.INT, Integer.toString(chapterOrd));
561564
}
562565

563566
private void endChapter() throws SAXException
@@ -660,7 +663,7 @@ private void endField() throws SAXException
660663
* @param type field type
661664
* @param value field value (as text)
662665
*/
663-
private void addSyntheticField(String name, AlixDocument.FieldType type, String value)
666+
private void addMetaField(String name, AlixDocument.FieldType type, String value)
664667
{
665668
doc.openField(name, type, null);
666669
doc.fieldText(value);

analysis/src/resources/com/github/oeuvres/alix/xml/alix.xsl

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -234,24 +234,22 @@
234234
<!-- Do not copy tags for book field cover -->
235235
<xsl:copy-of select="$tags"/>
236236
<alix:field name="content" type="text">
237-
<article>
237+
<article class="{$doctype}">
238238
<xsl:choose>
239239
<xsl:when test="/*/tei:text/tei:front | /*/tei:text/tei:back">
240-
<xsl:apply-templates
241-
select="/*/tei:text/tei:front | /*/tei:text/tei:body | /*/tei:text/tei:back"/>
240+
<xsl:call-template name="div-header">
241+
<xsl:with-param name="tei" select="/*/tei:text/tei:front | /*/tei:text/tei:body | /*/tei:text/tei:back"/>
242+
</xsl:call-template>
242243
</xsl:when>
243244
<xsl:otherwise>
244-
<xsl:apply-templates select="/*/tei:text/tei:body/node()"/>
245+
<xsl:call-template name="div-header">
246+
<xsl:with-param name="tei" select="/*/tei:text/tei:body/node()"/>
247+
</xsl:call-template>
245248
</xsl:otherwise>
246249
</xsl:choose>
247-
<xsl:variable name="notes">
248-
<xsl:for-each select="/*/tei:text">
249-
<xsl:call-template name="footnotes"/>
250-
</xsl:for-each>
251-
</xsl:variable>
252-
<xsl:if test="$notes != ''">
253-
<xsl:copy-of select="$notes"/>
254-
</xsl:if>
250+
<xsl:for-each select="/*/tei:text">
251+
<xsl:call-template name="footnotes"/>
252+
</xsl:for-each>
255253
</article>
256254
</alix:field>
257255
</xsl:otherwise>
@@ -419,7 +417,7 @@
419417
<xsl:call-template name="toclocal"/>
420418
</alix:field>
421419
<alix:field name="content" type="text">
422-
<article>
420+
<article class="chapter">
423421
<xsl:choose>
424422
<xsl:when test="descendant::*[key('split', generate-id())]">
425423
<!-- take content before sections -->
@@ -439,16 +437,13 @@
439437
)[1]"/>
440438
<xsl:call-template name="div-header">
441439
<xsl:with-param name="tei" select="$first/preceding-sibling::node()"/>
442-
<xsl:with-param name="level" select="1"/>
443440
</xsl:call-template>
444441
<xsl:call-template name="footnotes">
445442
<xsl:with-param name="tei" select="$first/preceding-sibling::node()"/>
446443
</xsl:call-template>
447444
</xsl:when>
448445
<xsl:otherwise>
449-
<xsl:call-template name="div-header">
450-
<xsl:with-param name="level" select="1"/>
451-
</xsl:call-template>
446+
<xsl:call-template name="div-header"/>
452447
<xsl:call-template name="footnotes"/>
453448
</xsl:otherwise>
454449
</xsl:choose>

analysis/src/resources/com/github/oeuvres/alix/xml/tei_common.xsl

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1792,4 +1792,68 @@ dégrossi le travail, mais du reste à faire -->
17921792
</xsl:if>
17931793
</xsl:template>
17941794

1795+
1796+
<xsl:template name="truncate">
1797+
<xsl:param name="string"/>
1798+
<xsl:param name="len" select="150"/>
1799+
<xsl:param name="ellipsis">…</xsl:param>
1800+
<xsl:choose>
1801+
<xsl:when test="string-length($string) &lt;= $len">
1802+
<xsl:value-of select="$string"/>
1803+
</xsl:when>
1804+
<xsl:otherwise>
1805+
<xsl:variable name="substring" select="substring($string, 1, $len)"/>
1806+
<xsl:variable name="before-last-space">
1807+
<xsl:variable name="after-last-space">
1808+
<xsl:call-template name="last-token">
1809+
<xsl:with-param name="string" select="$substring"/>
1810+
<xsl:with-param name="sep" select="' '"/>
1811+
</xsl:call-template>
1812+
</xsl:variable>
1813+
<xsl:value-of select="substring($substring, 1,
1814+
string-length($substring) - string-length($after-last-space) - 1)"/>
1815+
</xsl:variable>
1816+
<xsl:call-template name="rtrim-nonletter">
1817+
<xsl:with-param name="string" select="$before-last-space"/>
1818+
</xsl:call-template>
1819+
<xsl:value-of select="$ellipsis"/>
1820+
</xsl:otherwise>
1821+
</xsl:choose>
1822+
</xsl:template>
1823+
1824+
1825+
<xsl:template name="last-token">
1826+
<xsl:param name="string"/>
1827+
<xsl:param name="sep"/>
1828+
<xsl:choose>
1829+
<xsl:when test="contains($string, $sep)">
1830+
<xsl:call-template name="last-token">
1831+
<xsl:with-param name="string" select="substring-after($string, $sep)"/>
1832+
<xsl:with-param name="sep" select="$sep"/>
1833+
</xsl:call-template>
1834+
</xsl:when>
1835+
<xsl:otherwise>
1836+
<xsl:value-of select="$string"/>
1837+
</xsl:otherwise>
1838+
</xsl:choose>
1839+
</xsl:template>
1840+
1841+
1842+
<xsl:template name="rtrim-nonletter">
1843+
<xsl:param name="string"/>
1844+
<xsl:variable name="last" select="substring($string, string-length($string), 1)"/>
1845+
<xsl:choose>
1846+
<xsl:when test="string-length($string) = 0">
1847+
</xsl:when>
1848+
<xsl:when test="contains('  ,.;:!?—–«»&quot;-', $last)">
1849+
<xsl:call-template name="rtrim-nonletter">
1850+
<xsl:with-param name="string" select="substring($string, 1, string-length($string) - 1)"/>
1851+
</xsl:call-template>
1852+
</xsl:when>
1853+
<xsl:otherwise>
1854+
<xsl:value-of select="$string"/>
1855+
</xsl:otherwise>
1856+
</xsl:choose>
1857+
</xsl:template>
1858+
17951859
</xsl:transform>

analysis/src/resources/com/github/oeuvres/alix/xml/tei_html/tei_flow_html.xsl

Lines changed: 74 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,13 @@ Sections
165165
Sections, group opening infos in a <header> element
166166
-->
167167
<xsl:template name="div-header">
168-
<xsl:param name="level"/>
168+
<xsl:param name="level" select="1"/>
169169
<xsl:param name="from"/>
170170
<xsl:param name="tei" select="node()"/>
171171
<xsl:variable name="first" select="
172-
($tei[not(self::tei:argument)]
172+
($tei
173+
[not(self::text())]
174+
[not(self::tei:argument)]
173175
[not(self::tei:byline)]
174176
[not(self::tei:cb)]
175177
[not(self::tei:dateline)]
@@ -187,24 +189,26 @@ Sections
187189
<xsl:choose>
188190
<!-- opener play the role of header -->
189191
<xsl:when test="$tei[self::tei:opener]">
190-
<xsl:apply-templates select="$tei">
192+
<xsl:apply-templates select="$tei/tei:opener">
191193
<xsl:with-param name="level" select="$level"/>
192194
<xsl:with-param name="from" select="$from"/>
193195
</xsl:apply-templates>
196+
<div class="body">
197+
<xsl:apply-templates select="$tei">
198+
<xsl:with-param name="level" select="$level"/>
199+
<xsl:with-param name="from" select="$from"/>
200+
</xsl:apply-templates>
201+
</div>
202+
</xsl:when>
203+
<xsl:when test="not($first)">
204+
<div class="body">
205+
<xsl:apply-templates select="$tei">
206+
<xsl:with-param name="level" select="$level"/>
207+
<xsl:with-param name="from" select="$from"/>
208+
</xsl:apply-templates>
209+
</div>
194210
</xsl:when>
195-
<!-- Candidates for section title -->
196-
<xsl:when test="$first and
197-
$first/preceding-sibling::*[
198-
self::tei:argument
199-
or self::tei:byline
200-
or self::tei:dateline
201-
or self::tei:docAuthor
202-
or self::tei:docDate
203-
or self::tei:epigraph
204-
or self::tei:head
205-
or self::tei:salute
206-
or self::tei:signed
207-
]">
211+
<xsl:otherwise>
208212
<header>
209213
<xsl:apply-templates select="$first/preceding-sibling::node()">
210214
<xsl:with-param name="level" select="$level"/>
@@ -217,12 +221,6 @@ Sections
217221
<xsl:with-param name="from" select="$from"/>
218222
</xsl:apply-templates>
219223
</div>
220-
</xsl:when>
221-
<xsl:otherwise>
222-
<xsl:apply-templates select="$tei">
223-
<xsl:with-param name="level" select="$level"/>
224-
<xsl:with-param name="from" select="$from"/>
225-
</xsl:apply-templates>
226224
</xsl:otherwise>
227225
</xsl:choose>
228226
</xsl:template>
@@ -2227,6 +2225,60 @@ Elements block or inline level
22272225
</xsl:otherwise>
22282226
</xsl:choose>
22292227
</xsl:template>
2228+
2229+
<xsl:template match="*" mode="byline">
2230+
<xsl:message>
2231+
<xsl:value-of select="name()"/>
2232+
<xsl:text> not implemeted for mode="byline"</xsl:text>
2233+
</xsl:message>
2234+
</xsl:template>
2235+
2236+
<xsl:template match="tei:biblStruct" mode="byline">
2237+
<xsl:choose>
2238+
<xsl:when test="tei:analytic">
2239+
<xsl:for-each select="tei:analytic/tei:editor
2240+
| tei:analytic/tei:author">
2241+
<xsl:apply-templates select="."/>
2242+
<xsl:choose>
2243+
<xsl:when test="position() = last()"/>
2244+
<xsl:otherwise>, </xsl:otherwise>
2245+
</xsl:choose>
2246+
</xsl:for-each>
2247+
</xsl:when>
2248+
<xsl:when test="tei:monogr">
2249+
<xsl:for-each select="tei:monogr/tei:editor
2250+
| tei:monogr/tei:author">
2251+
<xsl:apply-templates select="."/>
2252+
<xsl:choose>
2253+
<xsl:when test="position() = last()"/>
2254+
<xsl:otherwise>, </xsl:otherwise>
2255+
</xsl:choose>
2256+
</xsl:for-each>
2257+
</xsl:when>
2258+
</xsl:choose>
2259+
</xsl:template>
2260+
2261+
<xsl:template match="tei:biblStruct" mode="year">
2262+
<xsl:variable name="year">
2263+
<xsl:choose>
2264+
<xsl:when test="tei:analytic/tei:date">
2265+
<xsl:apply-templates mode="year" select="tei:analytic/tei:date"/>
2266+
</xsl:when>
2267+
<xsl:when test="tei:monogr/tei:imprint/tei:date">
2268+
<xsl:apply-templates mode="year" select="tei:monogr/tei:imprint/tei:date"/>
2269+
</xsl:when>
2270+
</xsl:choose>
2271+
</xsl:variable>
2272+
<xsl:choose>
2273+
<xsl:when test="$year != ''">
2274+
<time class="year">
2275+
<xsl:text>(</xsl:text>
2276+
<xsl:value-of select="$year"/>
2277+
<xsl:text>)</xsl:text>
2278+
</time>
2279+
</xsl:when>
2280+
</xsl:choose>
2281+
</xsl:template>
22302282
<!--
22312283
<h3>Indexables</h3>
22322284
-->

0 commit comments

Comments
 (0)