@@ -114,6 +114,8 @@ impl EpubParser {
114114 fn parse_opf_metadata ( opf_xml : & str ) -> Result < ( EpubInfo , Option < String > ) > {
115115 use quick_xml:: Reader ;
116116 use quick_xml:: events:: Event ;
117+ use std:: collections:: HashMap ;
118+
117119 let mut reader = Reader :: from_str ( opf_xml) ;
118120 reader. config_mut ( ) . trim_text ( true ) ;
119121 let mut buf = Vec :: new ( ) ;
@@ -125,111 +127,180 @@ impl EpubParser {
125127 let mut language = None ;
126128 let mut identifiers = Vec :: new ( ) ;
127129 let mut subjects = Vec :: new ( ) ;
130+
128131 let mut meta_cover_id: Option < String > = None ;
129- let mut series = None ;
130- let mut series_number = None ;
132+ let mut cal_series: Option < String > = None ;
133+ let mut cal_series_number: Option < String > = None ;
134+
135+ // EPUB3 collection tracking
136+ let mut epub3_collections: HashMap < String , String > = HashMap :: new ( ) ; // id -> name
137+ let mut epub3_indices: HashMap < String , String > = HashMap :: new ( ) ; // refines (#id) -> index
131138
132139 loop {
133140 match reader. read_event_into ( & mut buf) {
134- Ok ( Event :: Start ( ref e) ) | Ok ( Event :: Empty ( ref e) ) => {
135- let name = e. local_name ( ) ;
136- match name. as_ref ( ) {
137- b"metadata" => in_metadata = true ,
138- b"title" if in_metadata => {
139- if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
140- title = Some ( text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
141- }
142- } ,
143- b"creator" if in_metadata => {
144- if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
145- authors. push ( text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
146- }
147- } ,
148- b"description" if in_metadata => {
149- if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
150- description = Some ( text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
141+ Ok ( Event :: Start ( ref e) ) => {
142+ let local_name = e. local_name ( ) ;
143+ if local_name. as_ref ( ) == b"metadata" {
144+ in_metadata = true ;
145+ } else if in_metadata {
146+ match local_name. as_ref ( ) {
147+ b"title" => {
148+ if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
149+ title = Some ( text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
150+ }
151151 }
152- } ,
153- b"publisher" if in_metadata => {
154- if let Ok ( Event :: Text ( text) ) = reader . read_event_into ( & mut buf ) {
155- publisher = Some ( text . unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
152+ b"creator" => {
153+ if let Ok ( Event :: Text ( text ) ) = reader . read_event_into ( & mut buf ) {
154+ authors . push ( text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
155+ }
156156 }
157- } ,
158- b"language" if in_metadata => {
159- if let Ok ( Event :: Text ( text) ) = reader . read_event_into ( & mut buf ) {
160- language = Some ( text . unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
157+ b"description" => {
158+ if let Ok ( Event :: Text ( text ) ) = reader . read_event_into ( & mut buf ) {
159+ description = Some ( text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
160+ }
161161 }
162- } ,
163- b"identifier" if in_metadata => {
164- let mut scheme = None ;
165- for attr in e. attributes ( ) . flatten ( ) {
166- let key = attr. key . as_ref ( ) ;
167- if key == b"opf:scheme" || key == b"scheme" {
168- scheme = Some ( String :: from_utf8_lossy ( & attr. value ) . to_string ( ) ) ;
162+ b"publisher" => {
163+ if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
164+ publisher = Some ( text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
169165 }
170166 }
171- if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
172- let value = text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ;
173- let ( final_scheme, final_value) = if let Some ( s) = scheme {
174- ( s, value. clone ( ) )
175- } else if let Some ( colon_pos) = value. find ( ':' ) {
176- let potential_scheme = & value[ ..colon_pos] ;
177- let potential_value = & value[ colon_pos + 1 ..] ;
178- ( potential_scheme. to_string ( ) , potential_value. to_string ( ) )
179- } else {
180- ( "unknown" . to_string ( ) , value. clone ( ) )
181- } ;
182- identifiers. push ( Identifier :: new ( final_scheme, final_value) ) ;
167+ b"language" => {
168+ if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
169+ language = Some ( text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
170+ }
183171 }
184- } ,
185- b"subject" if in_metadata => {
186- if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
187- let subject = text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ;
188- if !subject. is_empty ( ) {
189- subjects. push ( subject) ;
172+ b"identifier" => {
173+ let mut scheme = None ;
174+ for attr in e. attributes ( ) . flatten ( ) {
175+ let key = attr. key . as_ref ( ) ;
176+ if key == b"opf:scheme" || key == b"scheme" {
177+ scheme = Some ( String :: from_utf8_lossy ( & attr. value ) . to_string ( ) ) ;
178+ }
179+ }
180+ if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
181+ let value = text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ;
182+ let ( final_scheme, final_value) = if let Some ( s) = scheme {
183+ ( s, value. clone ( ) )
184+ } else if let Some ( colon_pos) = value. find ( ':' ) {
185+ let potential_scheme = & value[ ..colon_pos] ;
186+ let potential_value = & value[ colon_pos + 1 ..] ;
187+ ( potential_scheme. to_string ( ) , potential_value. to_string ( ) )
188+ } else {
189+ ( "unknown" . to_string ( ) , value. clone ( ) )
190+ } ;
191+ identifiers. push ( Identifier :: new ( final_scheme, final_value) ) ;
190192 }
191193 }
192- } ,
193- b"meta" if in_metadata => {
194- let mut name = None ;
195- let mut content = None ;
196- for attr in e. attributes ( ) . flatten ( ) {
197- let key = attr. key . as_ref ( ) ;
198- if key == b"name" {
199- name = Some ( String :: from_utf8_lossy ( & attr. value ) . to_string ( ) ) ;
200- } else if key == b"content" {
201- content = Some ( String :: from_utf8_lossy ( & attr. value ) . to_string ( ) ) ;
194+ b"subject" => {
195+ if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
196+ let subject = text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ;
197+ if !subject. is_empty ( ) {
198+ subjects. push ( subject) ;
199+ }
202200 }
203201 }
204- if let ( Some ( n) , Some ( c) ) = ( name, content) {
205- if n == "cover" {
206- meta_cover_id = Some ( c. clone ( ) ) ;
202+ b"meta" => {
203+ let mut property = None ;
204+ let mut id = None ;
205+ let mut refines = None ;
206+
207+ let mut name_attr = None ;
208+ let mut content_attr = None ;
209+
210+ for attr in e. attributes ( ) . flatten ( ) {
211+ let key = attr. key . as_ref ( ) ;
212+ match key {
213+ b"property" => property = Some ( String :: from_utf8_lossy ( & attr. value ) . to_string ( ) ) ,
214+ b"id" => id = Some ( String :: from_utf8_lossy ( & attr. value ) . to_string ( ) ) ,
215+ b"refines" => refines = Some ( String :: from_utf8_lossy ( & attr. value ) . to_string ( ) ) ,
216+ b"name" => name_attr = Some ( String :: from_utf8_lossy ( & attr. value ) . to_string ( ) ) ,
217+ b"content" => content_attr = Some ( String :: from_utf8_lossy ( & attr. value ) . to_string ( ) ) ,
218+ _ => { }
219+ }
207220 }
208- if n == "calibre:series" {
209- series = Some ( c. clone ( ) ) ;
221+
222+ if let ( Some ( n) , Some ( c) ) = ( & name_attr, & content_attr) {
223+ if n == "cover" { meta_cover_id = Some ( c. clone ( ) ) ; }
224+ if n == "calibre:series" { cal_series = Some ( c. clone ( ) ) ; }
225+ if n == "calibre:series_index" { cal_series_number = Some ( c. clone ( ) ) ; }
210226 }
211- if n == "calibre:series_index" {
212- series_number = Some ( c) ;
227+
228+ if let Some ( prop) = property {
229+ if prop == "belongs-to-collection" {
230+ if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
231+ if let Some ( i) = id {
232+ epub3_collections. insert ( i, text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
233+ }
234+ }
235+ } else if prop == "group-position" {
236+ if let Ok ( Event :: Text ( text) ) = reader. read_event_into ( & mut buf) {
237+ if let Some ( r) = refines {
238+ let clean_refines = r. trim_start_matches ( '#' ) ;
239+ epub3_indices. insert ( clean_refines. to_string ( ) , text. unescape ( ) . unwrap_or_default ( ) . to_string ( ) ) ;
240+ }
241+ }
242+ }
213243 }
214244 }
215- } ,
216- _ => { }
245+ _ => { }
246+ }
217247 }
218- } ,
248+ }
249+ Ok ( Event :: Empty ( ref e) ) => {
250+ let local_name = e. local_name ( ) ;
251+ if in_metadata && local_name. as_ref ( ) == b"meta" {
252+ let mut name_attr = None ;
253+ let mut content_attr = None ;
254+ for attr in e. attributes ( ) . flatten ( ) {
255+ let key = attr. key . as_ref ( ) ;
256+ match key {
257+ b"name" => name_attr = Some ( String :: from_utf8_lossy ( & attr. value ) . to_string ( ) ) ,
258+ b"content" => content_attr = Some ( String :: from_utf8_lossy ( & attr. value ) . to_string ( ) ) ,
259+ _ => { }
260+ }
261+ }
262+ if let ( Some ( n) , Some ( c) ) = ( name_attr, content_attr) {
263+ if n == "cover" {
264+ meta_cover_id = Some ( c) ;
265+ } else if n == "calibre:series" {
266+ cal_series = Some ( c) ;
267+ } else if n == "calibre:series_index" {
268+ cal_series_number = Some ( c) ;
269+ }
270+ }
271+ }
272+ }
219273 Ok ( Event :: End ( ref e) ) => {
220- let name = e. local_name ( ) ;
221- match name. as_ref ( ) {
222- b"metadata" => in_metadata = false ,
223- _ => { }
274+ if e. local_name ( ) . as_ref ( ) == b"metadata" {
275+ in_metadata = false ;
224276 }
225- } ,
277+ }
226278 Ok ( Event :: Eof ) => break ,
227279 Err ( e) => return Err ( anyhow ! ( "Error parsing OPF: {}" , e) ) ,
228280 _ => { }
229281 }
230282 buf. clear ( ) ;
231283 }
232284
285+ let ( series, series_number) = if !epub3_collections. is_empty ( ) {
286+ let mut best = None ;
287+ for ( id, name) in & epub3_collections {
288+ if let Some ( idx) = epub3_indices. get ( id) {
289+ best = Some ( ( Some ( name. clone ( ) ) , Some ( idx. clone ( ) ) ) ) ;
290+ break ;
291+ }
292+ }
293+ best. unwrap_or_else ( || {
294+ if let Some ( ( _, name) ) = epub3_collections. iter ( ) . next ( ) {
295+ ( Some ( name. clone ( ) ) , None )
296+ } else {
297+ ( None , None )
298+ }
299+ } )
300+ } else {
301+ ( cal_series, cal_series_number)
302+ } ;
303+
233304 let cover_id = meta_cover_id;
234305 let info = EpubInfo {
235306 title : title. unwrap_or_else ( || "Unknown Title" . to_string ( ) ) ,
@@ -304,4 +375,4 @@ impl EpubParser {
304375 }
305376 Ok ( ( None , None ) )
306377 }
307- }
378+ }
0 commit comments