Skip to content

Commit fdae826

Browse files
committed
Added support for epub3 collections metadata
1 parent a6d228f commit fdae826

File tree

1 file changed

+149
-78
lines changed

1 file changed

+149
-78
lines changed

src/epub_parser.rs

Lines changed: 149 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ impl EpubParser {
114114
fn parse_opf_metadata(opf_xml: &str) -> Result<(EpubInfo, Option<String>)> {
115115
use quick_xml::Reader;
116116
use quick_xml::events::Event;
117+
use std::collections::HashMap;
118+
117119
let mut reader = Reader::from_str(opf_xml);
118120
reader.config_mut().trim_text(true);
119121
let mut buf = Vec::new();
@@ -125,111 +127,180 @@ impl EpubParser {
125127
let mut language = None;
126128
let mut identifiers = Vec::new();
127129
let mut subjects = Vec::new();
130+
128131
let mut meta_cover_id: Option<String> = None;
129-
let mut series = None;
130-
let mut series_number = None;
132+
let mut cal_series: Option<String> = None;
133+
let mut cal_series_number: Option<String> = None;
134+
135+
// EPUB3 collection tracking
136+
let mut epub3_collections: HashMap<String, String> = HashMap::new(); // id -> name
137+
let mut epub3_indices: HashMap<String, String> = HashMap::new(); // refines (#id) -> index
131138

132139
loop {
133140
match reader.read_event_into(&mut buf) {
134-
Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
135-
let name = e.local_name();
136-
match name.as_ref() {
137-
b"metadata" => in_metadata = true,
138-
b"title" if in_metadata => {
139-
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
140-
title = Some(text.unescape().unwrap_or_default().to_string());
141-
}
142-
},
143-
b"creator" if in_metadata => {
144-
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
145-
authors.push(text.unescape().unwrap_or_default().to_string());
146-
}
147-
},
148-
b"description" if in_metadata => {
149-
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
150-
description = Some(text.unescape().unwrap_or_default().to_string());
141+
Ok(Event::Start(ref e)) => {
142+
let local_name = e.local_name();
143+
if local_name.as_ref() == b"metadata" {
144+
in_metadata = true;
145+
} else if in_metadata {
146+
match local_name.as_ref() {
147+
b"title" => {
148+
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
149+
title = Some(text.unescape().unwrap_or_default().to_string());
150+
}
151151
}
152-
},
153-
b"publisher" if in_metadata => {
154-
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
155-
publisher = Some(text.unescape().unwrap_or_default().to_string());
152+
b"creator" => {
153+
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
154+
authors.push(text.unescape().unwrap_or_default().to_string());
155+
}
156156
}
157-
},
158-
b"language" if in_metadata => {
159-
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
160-
language = Some(text.unescape().unwrap_or_default().to_string());
157+
b"description" => {
158+
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
159+
description = Some(text.unescape().unwrap_or_default().to_string());
160+
}
161161
}
162-
},
163-
b"identifier" if in_metadata => {
164-
let mut scheme = None;
165-
for attr in e.attributes().flatten() {
166-
let key = attr.key.as_ref();
167-
if key == b"opf:scheme" || key == b"scheme" {
168-
scheme = Some(String::from_utf8_lossy(&attr.value).to_string());
162+
b"publisher" => {
163+
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
164+
publisher = Some(text.unescape().unwrap_or_default().to_string());
169165
}
170166
}
171-
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
172-
let value = text.unescape().unwrap_or_default().to_string();
173-
let (final_scheme, final_value) = if let Some(s) = scheme {
174-
(s, value.clone())
175-
} else if let Some(colon_pos) = value.find(':') {
176-
let potential_scheme = &value[..colon_pos];
177-
let potential_value = &value[colon_pos + 1..];
178-
(potential_scheme.to_string(), potential_value.to_string())
179-
} else {
180-
("unknown".to_string(), value.clone())
181-
};
182-
identifiers.push(Identifier::new(final_scheme, final_value));
167+
b"language" => {
168+
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
169+
language = Some(text.unescape().unwrap_or_default().to_string());
170+
}
183171
}
184-
},
185-
b"subject" if in_metadata => {
186-
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
187-
let subject = text.unescape().unwrap_or_default().to_string();
188-
if !subject.is_empty() {
189-
subjects.push(subject);
172+
b"identifier" => {
173+
let mut scheme = None;
174+
for attr in e.attributes().flatten() {
175+
let key = attr.key.as_ref();
176+
if key == b"opf:scheme" || key == b"scheme" {
177+
scheme = Some(String::from_utf8_lossy(&attr.value).to_string());
178+
}
179+
}
180+
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
181+
let value = text.unescape().unwrap_or_default().to_string();
182+
let (final_scheme, final_value) = if let Some(s) = scheme {
183+
(s, value.clone())
184+
} else if let Some(colon_pos) = value.find(':') {
185+
let potential_scheme = &value[..colon_pos];
186+
let potential_value = &value[colon_pos + 1..];
187+
(potential_scheme.to_string(), potential_value.to_string())
188+
} else {
189+
("unknown".to_string(), value.clone())
190+
};
191+
identifiers.push(Identifier::new(final_scheme, final_value));
190192
}
191193
}
192-
},
193-
b"meta" if in_metadata => {
194-
let mut name = None;
195-
let mut content = None;
196-
for attr in e.attributes().flatten() {
197-
let key = attr.key.as_ref();
198-
if key == b"name" {
199-
name = Some(String::from_utf8_lossy(&attr.value).to_string());
200-
} else if key == b"content" {
201-
content = Some(String::from_utf8_lossy(&attr.value).to_string());
194+
b"subject" => {
195+
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
196+
let subject = text.unescape().unwrap_or_default().to_string();
197+
if !subject.is_empty() {
198+
subjects.push(subject);
199+
}
202200
}
203201
}
204-
if let (Some(n), Some(c)) = (name, content) {
205-
if n == "cover" {
206-
meta_cover_id = Some(c.clone());
202+
b"meta" => {
203+
let mut property = None;
204+
let mut id = None;
205+
let mut refines = None;
206+
207+
let mut name_attr = None;
208+
let mut content_attr = None;
209+
210+
for attr in e.attributes().flatten() {
211+
let key = attr.key.as_ref();
212+
match key {
213+
b"property" => property = Some(String::from_utf8_lossy(&attr.value).to_string()),
214+
b"id" => id = Some(String::from_utf8_lossy(&attr.value).to_string()),
215+
b"refines" => refines = Some(String::from_utf8_lossy(&attr.value).to_string()),
216+
b"name" => name_attr = Some(String::from_utf8_lossy(&attr.value).to_string()),
217+
b"content" => content_attr = Some(String::from_utf8_lossy(&attr.value).to_string()),
218+
_ => {}
219+
}
207220
}
208-
if n == "calibre:series" {
209-
series = Some(c.clone());
221+
222+
if let (Some(n), Some(c)) = (&name_attr, &content_attr) {
223+
if n == "cover" { meta_cover_id = Some(c.clone()); }
224+
if n == "calibre:series" { cal_series = Some(c.clone()); }
225+
if n == "calibre:series_index" { cal_series_number = Some(c.clone()); }
210226
}
211-
if n == "calibre:series_index" {
212-
series_number = Some(c);
227+
228+
if let Some(prop) = property {
229+
if prop == "belongs-to-collection" {
230+
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
231+
if let Some(i) = id {
232+
epub3_collections.insert(i, text.unescape().unwrap_or_default().to_string());
233+
}
234+
}
235+
} else if prop == "group-position" {
236+
if let Ok(Event::Text(text)) = reader.read_event_into(&mut buf) {
237+
if let Some(r) = refines {
238+
let clean_refines = r.trim_start_matches('#');
239+
epub3_indices.insert(clean_refines.to_string(), text.unescape().unwrap_or_default().to_string());
240+
}
241+
}
242+
}
213243
}
214244
}
215-
},
216-
_ => {}
245+
_ => {}
246+
}
217247
}
218-
},
248+
}
249+
Ok(Event::Empty(ref e)) => {
250+
let local_name = e.local_name();
251+
if in_metadata && local_name.as_ref() == b"meta" {
252+
let mut name_attr = None;
253+
let mut content_attr = None;
254+
for attr in e.attributes().flatten() {
255+
let key = attr.key.as_ref();
256+
match key {
257+
b"name" => name_attr = Some(String::from_utf8_lossy(&attr.value).to_string()),
258+
b"content" => content_attr = Some(String::from_utf8_lossy(&attr.value).to_string()),
259+
_ => {}
260+
}
261+
}
262+
if let (Some(n), Some(c)) = (name_attr, content_attr) {
263+
if n == "cover" {
264+
meta_cover_id = Some(c);
265+
} else if n == "calibre:series" {
266+
cal_series = Some(c);
267+
} else if n == "calibre:series_index" {
268+
cal_series_number = Some(c);
269+
}
270+
}
271+
}
272+
}
219273
Ok(Event::End(ref e)) => {
220-
let name = e.local_name();
221-
match name.as_ref() {
222-
b"metadata" => in_metadata = false,
223-
_ => {}
274+
if e.local_name().as_ref() == b"metadata" {
275+
in_metadata = false;
224276
}
225-
},
277+
}
226278
Ok(Event::Eof) => break,
227279
Err(e) => return Err(anyhow!("Error parsing OPF: {}", e)),
228280
_ => {}
229281
}
230282
buf.clear();
231283
}
232284

285+
let (series, series_number) = if !epub3_collections.is_empty() {
286+
let mut best = None;
287+
for (id, name) in &epub3_collections {
288+
if let Some(idx) = epub3_indices.get(id) {
289+
best = Some((Some(name.clone()), Some(idx.clone())));
290+
break;
291+
}
292+
}
293+
best.unwrap_or_else(|| {
294+
if let Some((_, name)) = epub3_collections.iter().next() {
295+
(Some(name.clone()), None)
296+
} else {
297+
(None, None)
298+
}
299+
})
300+
} else {
301+
(cal_series, cal_series_number)
302+
};
303+
233304
let cover_id = meta_cover_id;
234305
let info = EpubInfo {
235306
title: title.unwrap_or_else(|| "Unknown Title".to_string()),
@@ -304,4 +375,4 @@ impl EpubParser {
304375
}
305376
Ok((None, None))
306377
}
307-
}
378+
}

0 commit comments

Comments
 (0)