Skip to content

Commit 96dac68

Browse files
committed
Parse series info from PDF and CBZ files
1 parent 0a0ceda commit 96dac68

2 files changed

Lines changed: 101 additions & 19 deletions

File tree

comic-book.js

Lines changed: 69 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,55 @@
1+
// Read series metadata from a ComicInfo.xml entry, if present.
2+
// Spec: https://anansi-project.github.io/docs/comicinfo/intro
3+
const readComicInfoXML = async ({ entries, loadBlob }) => {
4+
const entry = entries.find(e => e.filename.toLowerCase() === 'comicinfo.xml')
5+
if (!entry) return null
6+
let text
7+
try {
8+
text = await (await loadBlob(entry.filename)).text()
9+
} catch {
10+
return null
11+
}
12+
let doc
13+
try {
14+
doc = new DOMParser().parseFromString(text, 'application/xml')
15+
} catch {
16+
return null
17+
}
18+
if (!doc || doc.getElementsByTagName('parsererror').length) return null
19+
const get = name => doc.getElementsByTagName(name).item(0)?.textContent?.trim() || undefined
20+
return {
21+
title: get('Title'),
22+
publisher: get('Publisher'),
23+
language: get('LanguageISO'),
24+
author: get('Writer'),
25+
series: get('Series'),
26+
seriesPosition: get('Number'),
27+
seriesTotal: get('Count'),
28+
}
29+
}
30+
31+
const readComicBookInfo = async ({ getComment }) => {
32+
let info
33+
try {
34+
info = JSON.parse(await getComment() || '')['ComicBookInfo/1.0']
35+
} catch {
36+
return null
37+
}
38+
if (!info) return null
39+
const year = info.publicationYear
40+
const month = info.publicationMonth
41+
const mm = month && month >= 1 && month <= 12 ? String(month).padStart(2, '0') : null
42+
return {
43+
title: info.title,
44+
publisher: info.publisher,
45+
language: info.language || info.lang,
46+
author: info.credits ? info.credits.map(c => `${c.person} (${c.role})`).join(', ') : '',
47+
published: year && month ? `${year}-${mm}` : undefined,
48+
series: info.series,
49+
seriesPosition: info.issue == null ? undefined : String(info.issue),
50+
}
51+
}
52+
153
export const makeComicBook = async ({ entries, loadBlob, getSize, getComment }, file) => {
254
const cache = new Map()
355
const urls = new Map()
@@ -24,25 +76,23 @@ export const makeComicBook = async ({ entries, loadBlob, getSize, getComment },
2476
if (!files.length) throw new Error('No supported image files in archive')
2577

2678
const book = {}
27-
try {
28-
const jsonComment = JSON.parse(await getComment() || '')
29-
const info = jsonComment['ComicBookInfo/1.0']
30-
if (info) {
31-
const year = info.publicationYear
32-
const month = info.publicationMonth
33-
const mm = month && month >= 1 && month <= 12 ? String(month).padStart(2, '0') : null
34-
book.metadata = {
35-
title: info.title || file.name,
36-
publisher: info.publisher,
37-
language: info.language || info.lang,
38-
author: info.credits ? info.credits.map(c => `${c.person} (${c.role})`).join(', ') : '',
39-
published: year && month ? `${year}-${mm}` : undefined,
40-
}
41-
} else {
42-
book.metadata = { title: file.name }
43-
}
44-
} catch {
45-
book.metadata = { title: file.name }
79+
// Prefer ComicInfo.xml (Anansi standard) over ComicBookInfo (JSON in zip comment).
80+
// Fields missing from the preferred source fall through to the secondary one.
81+
const xml = await readComicInfoXML({ entries, loadBlob })
82+
const cbi = await readComicBookInfo({ getComment })
83+
const merged = { ...(cbi || {}), ...(xml || {}) }
84+
book.metadata = {
85+
title: merged.title || file.name,
86+
publisher: merged.publisher,
87+
language: merged.language,
88+
author: merged.author,
89+
published: merged.published,
90+
}
91+
if (merged.series) {
92+
const series = { name: merged.series }
93+
if (merged.seriesPosition) series.position = merged.seriesPosition
94+
if (merged.seriesTotal) series.total = merged.seriesTotal
95+
book.metadata.belongsTo = { series }
4696
}
4797
book.getCover = () => loadBlob(files[0])
4898
book.sections = files.map(name => ({

pdf.js

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,35 @@ const makeTOCItem = async (item, pdf) => {
316316

317317
const MAX_CACHED_PAGES = 8
318318

319+
const CALIBRE_NS = 'http://calibre-ebook.com/xmp-namespace'
320+
const CALIBRE_SI_NS = 'http://calibre-ebook.com/xmp-namespace-series-index'
321+
const RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
322+
323+
// Calibre writes series metadata into the XMP packet as
324+
// <calibre:series rdf:parseType="Resource">
325+
// <rdf:value>Name</rdf:value>
326+
// <calibreSI:series_index>1.00</calibreSI:series_index>
327+
// </calibre:series>
328+
const parseCalibreSeriesFromXMP = raw => {
329+
if (!raw || typeof raw !== 'string') return null
330+
let doc
331+
try {
332+
doc = new DOMParser().parseFromString(raw, 'application/xml')
333+
} catch {
334+
return null
335+
}
336+
if (!doc || doc.getElementsByTagName('parsererror').length) return null
337+
const seriesEls = doc.getElementsByTagNameNS(CALIBRE_NS, 'series')
338+
const seriesEl = seriesEls.item(0)
339+
if (!seriesEl) return null
340+
const valueEl = seriesEl.getElementsByTagNameNS(RDF_NS, 'value').item(0)
341+
const name = valueEl?.textContent?.trim()
342+
if (!name) return null
343+
const indexEl = seriesEl.getElementsByTagNameNS(CALIBRE_SI_NS, 'series_index').item(0)
344+
const position = indexEl?.textContent?.trim()
345+
return position ? { name, position } : { name }
346+
}
347+
319348
export const makePDF = async file => {
320349
const transport = new pdfjsLib.PDFDataRangeTransport(file.size, [])
321350
transport.requestDataRange = (begin, end) => {
@@ -354,6 +383,9 @@ export const makePDF = async file => {
354383
rights: metadata?.get('dc:rights'),
355384
}
356385

386+
const calibreSeries = parseCalibreSeriesFromXMP(metadata?.getRaw?.())
387+
if (calibreSeries) book.metadata.belongsTo = { series: calibreSeries }
388+
357389
const outline = await pdf.getOutline()
358390
book.toc = outline ? await Promise.all(outline.map(item => makeTOCItem(item, pdf))) : null
359391

0 commit comments

Comments
 (0)