Skip to content

Commit 2c09b7a

Browse files
authored
Parse EPUB and RWPM accessibility metadata (#235)
1 parent 8b95a2b commit 2c09b7a

File tree

12 files changed

+911
-199
lines changed

12 files changed

+911
-199
lines changed

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,17 @@ All notable changes to this project will be documented in this file. Take a look
1010

1111
#### Shared
1212

13+
* Support for the accessibility metadata in RWPM per [Schema.org Accessibility Properties for Discoverability Vocabulary](https://www.w3.org/2021/a11y-discov-vocab/latest/).
1314
* [Extract the raw content (text, images, etc.) of a publication](Documentation/Guides/Content.md).
1415

1516
#### Navigator
1617

1718
* [A brand new text-to-speech implementation](Documentation/Guides/TTS.md).
1819

20+
#### Streamer
21+
22+
* Parse EPUB accessibility metadata ([see documentation](https://readium.org/architecture/streamer/parser/a11y-metadata-parsing)).
23+
1924
### Deprecated
2025

2126
#### Shared

Sources/Shared/Publication/Accessibility.swift

+419
Large diffs are not rendered by default.

Sources/Shared/Publication/Metadata.swift

+7
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ public struct Metadata: Hashable, Loggable, WarningLogger {
2828
public let localizedSubtitle: LocalizedString?
2929
public var subtitle: String? { localizedSubtitle?.string }
3030

31+
public let accessibility: Accessibility?
3132
public let modified: Date?
3233
public let published: Date?
3334
public let languages: [String] // BCP 47 tag
@@ -70,6 +71,7 @@ public struct Metadata: Hashable, Loggable, WarningLogger {
7071
conformsTo: [Publication.Profile] = [],
7172
title: LocalizedStringConvertible,
7273
subtitle: LocalizedStringConvertible? = nil,
74+
accessibility: Accessibility? = nil,
7375
modified: Date? = nil,
7476
published: Date? = nil,
7577
languages: [String] = [],
@@ -102,6 +104,7 @@ public struct Metadata: Hashable, Loggable, WarningLogger {
102104
self.conformsTo = conformsTo
103105
self.localizedTitle = title.localizedString
104106
self.localizedSubtitle = subtitle?.localizedString
107+
self.accessibility = accessibility
105108
self.modified = modified
106109
self.published = published
107110
self.languages = languages
@@ -151,6 +154,7 @@ public struct Metadata: Hashable, Loggable, WarningLogger {
151154
.map { Publication.Profile($0) }
152155
self.localizedTitle = title
153156
self.localizedSubtitle = try? LocalizedString(json: json.pop("subtitle"), warnings: warnings)
157+
self.accessibility = try? Accessibility(json: json.pop("accessibility"), warnings: warnings)
154158
self.modified = parseDate(json.pop("modified"))
155159
self.published = parseDate(json.pop("published"))
156160
self.languages = parseArray(json.pop("language"), allowingSingle: true)
@@ -187,6 +191,7 @@ public struct Metadata: Hashable, Loggable, WarningLogger {
187191
"conformsTo": encodeIfNotEmpty(conformsTo.map { $0.uri }),
188192
"title": localizedTitle.json,
189193
"subtitle": encodeIfNotNil(localizedSubtitle?.json),
194+
"accessibility": encodeIfNotEmpty(accessibility?.json),
190195
"modified": encodeIfNotNil(modified?.iso8601),
191196
"published": encodeIfNotNil(published?.iso8601),
192197
"language": encodeIfNotEmpty(languages),
@@ -255,6 +260,7 @@ public struct Metadata: Hashable, Loggable, WarningLogger {
255260
conformsTo: [Publication.Profile]? = nil,
256261
title: LocalizedStringConvertible? = nil,
257262
subtitle: LocalizedStringConvertible?? = nil,
263+
accessibility: Accessibility?? = nil,
258264
modified: Date?? = nil,
259265
published: Date?? = nil,
260266
languages: [String]? = nil,
@@ -288,6 +294,7 @@ public struct Metadata: Hashable, Loggable, WarningLogger {
288294
conformsTo: conformsTo ?? self.conformsTo,
289295
title: title ?? self.localizedTitle,
290296
subtitle: subtitle ?? self.localizedSubtitle,
297+
accessibility: accessibility ?? self.accessibility,
291298
modified: modified ?? self.modified,
292299
published: published ?? self.published,
293300
languages: languages ?? self.languages,

Sources/Streamer/Parser/EPUB/EPUBMetadataParser.swift

+96
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ final class EPUBMetadataParser: Loggable {
5252
conformsTo: [.epub],
5353
title: mainTitle ?? fallbackTitle,
5454
subtitle: subtitle,
55+
accessibility: accessibility(),
5556
modified: modifiedDate,
5657
published: publishedDate,
5758
languages: languages,
@@ -189,6 +190,101 @@ final class EPUBMetadataParser: Loggable {
189190
private lazy var mainTitle: LocalizedString? = localizedString(for: mainTitleElement)
190191

191192
private lazy var subtitle: LocalizedString? = localizedString(for: titleElements(ofType: .subtitle).first)
193+
194+
/// https://readium.org/architecture/streamer/parser/a11y-metadata-parsing
195+
private func accessibility() -> Accessibility? {
196+
let accessibility: Accessibility? = Accessibility(
197+
conformsTo: accessibilityProfiles(),
198+
certification: accessibilityCertification(),
199+
summary: metas["accessibilitySummary", in: .schema].first?.content,
200+
accessModes: accessibilityAccessModes(),
201+
accessModesSufficient: accessibilityAccessModesSufficient(),
202+
features: accessibilityFeatures(),
203+
hazards: accessibilityHazards()
204+
)
205+
206+
return accessibility.takeIf { $0 != Accessibility() }
207+
}
208+
209+
private func accessibilityProfiles() -> [Accessibility.Profile] {
210+
metas["conformsTo", in: .dcterms]
211+
.compactMap { accessibilityProfile(from: $0.content) }
212+
}
213+
214+
private func accessibilityProfile(from value: String) -> Accessibility.Profile? {
215+
switch value {
216+
case "EPUB Accessibility 1.1 - WCAG 2.0 Level A",
217+
"http://idpf.org/epub/a11y/accessibility-20170105.html#wcag-a",
218+
"http://www.idpf.org/epub/a11y/accessibility-20170105.html#wcag-a",
219+
"https://idpf.org/epub/a11y/accessibility-20170105.html#wcag-a",
220+
"https://www.idpf.org/epub/a11y/accessibility-20170105.html#wcag-a":
221+
return .epubA11y10WCAG20A
222+
223+
case "EPUB Accessibility 1.1 - WCAG 2.0 Level AA",
224+
"http://idpf.org/epub/a11y/accessibility-20170105.html#wcag-aa",
225+
"http://www.idpf.org/epub/a11y/accessibility-20170105.html#wcag-aa",
226+
"https://idpf.org/epub/a11y/accessibility-20170105.html#wcag-aa",
227+
"https://www.idpf.org/epub/a11y/accessibility-20170105.html#wcag-aa":
228+
return .epubA11y10WCAG20AA
229+
230+
case "EPUB Accessibility 1.1 - WCAG 2.0 Level AAA",
231+
"http://idpf.org/epub/a11y/accessibility-20170105.html#wcag-aaa",
232+
"http://www.idpf.org/epub/a11y/accessibility-20170105.html#wcag-aaa",
233+
"https://idpf.org/epub/a11y/accessibility-20170105.html#wcag-aaa",
234+
"https://www.idpf.org/epub/a11y/accessibility-20170105.html#wcag-aaa":
235+
return .epubA11y10WCAG20AAA
236+
default:
237+
return nil
238+
}
239+
}
240+
241+
private func accessibilityCertification() -> Accessibility.Certification? {
242+
let certifier = metas["certifiedBy", in: .a11y].first
243+
let credential: String?
244+
let report: String?
245+
if let id = certifier?.id {
246+
credential = metas["certifierCredential", in: .a11y, refining: id].first?.content
247+
report = metas.links(withRel: "certifierReport", in: .a11y, refining: id).first?.href
248+
} else {
249+
credential = metas["certifierCredential", in: .a11y].first?.content
250+
report = metas["certifierReport", in: .a11y].first?.content
251+
?? metas.links(withRel: "certifierReport", in: .a11y).first?.href
252+
}
253+
guard certifier != nil || credential != nil || report != nil else {
254+
return nil
255+
}
256+
257+
return Accessibility.Certification(
258+
certifiedBy: certifier?.content,
259+
credential: credential,
260+
report: report
261+
)
262+
}
263+
264+
private func accessibilityAccessModes() -> [Accessibility.AccessMode] {
265+
metas["accessMode", in: .schema]
266+
.map { Accessibility.AccessMode($0.content) }
267+
}
268+
269+
private func accessibilityAccessModesSufficient() -> [[Accessibility.PrimaryAccessMode]] {
270+
metas["accessModeSufficient", in: .schema]
271+
.map {
272+
$0.content.split(separator: ",")
273+
.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
274+
.filter { !$0.isEmpty }
275+
.compactMap(Accessibility.PrimaryAccessMode.init(rawValue:))
276+
}
277+
}
278+
279+
private func accessibilityFeatures() -> [Accessibility.Feature] {
280+
metas["accessibilityFeature", in: .schema]
281+
.map { Accessibility.Feature($0.content) }
282+
}
283+
284+
private func accessibilityHazards() -> [Accessibility.Hazard] {
285+
metas["accessibilityHazard", in: .schema]
286+
.map { Accessibility.Hazard($0.content) }
287+
}
192288

193289
/// Parse and return the Epub unique identifier.
194290
/// https://github.com/readium/architecture/blob/master/streamer/parser/metadata.md#identifier

Sources/Streamer/Parser/EPUB/OPFMeta.swift

+52-5
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ enum OPFVocabulary: String {
128128

129129
}
130130

131-
132131
/// Represents a `meta` tag in an OPF document.
133132
struct OPFMeta {
134133
let property: String
@@ -141,11 +140,22 @@ struct OPFMeta {
141140
let element: Fuzi.XMLElement
142141
}
143142

143+
/// Represents a `link` tag in an OPF document.
144+
struct OPFLink {
145+
let rel: String
146+
/// URI of the rel's vocabulary.
147+
let vocabularyURI: String
148+
let href: String
149+
/// ID of the metadata that is refined by this one, if any.
150+
let refines: String?
151+
let element: Fuzi.XMLElement
152+
}
144153

145154
struct OPFMetaList {
146155

147156
private let document: Fuzi.XMLDocument
148157
private let metas: [OPFMeta]
158+
private let links: [OPFLink]
149159

150160
init(document: Fuzi.XMLDocument) {
151161
self.document = document
@@ -155,7 +165,8 @@ struct OPFMetaList {
155165

156166
// Parses `<meta>` and `<dc:x>` tags in order of appearance.
157167
let root = "/opf:package/opf:metadata"
158-
self.metas = document.xpath("\(root)/opf:meta|\(root)/dc:*|\(root)/opf:dc-metadata/dc:*|\(root)/opf:x-metadata/opf:meta")
168+
self.metas = document
169+
.xpath("\(root)/opf:meta|\(root)/dc:*|\(root)/opf:dc-metadata/dc:*|\(root)/opf:x-metadata/opf:meta")
159170
.compactMap { meta in
160171
if meta.tag == "meta" {
161172
// EPUB 3
@@ -195,6 +206,29 @@ struct OPFMetaList {
195206
)
196207
}
197208
}
209+
210+
self.links = document
211+
.xpath("\(root)/opf:link")
212+
.compactMap { link in
213+
guard
214+
let originalRel = link.attr("rel"),
215+
let href = link.attr("href")
216+
else {
217+
return nil
218+
}
219+
220+
let (rel, vocabularyURI) = OPFVocabulary.parse(property: originalRel, prefixes: prefixes)
221+
222+
var refinedID = link.attr("refines")
223+
refinedID?.removeFirst() // Get rid of the # before the ID.
224+
return OPFLink(
225+
rel: rel,
226+
vocabularyURI: vocabularyURI,
227+
href: href,
228+
refines: refinedID,
229+
element: link
230+
)
231+
}
198232
}
199233

200234
subscript(_ property: String) -> [OPFMeta] {
@@ -213,6 +247,14 @@ struct OPFMetaList {
213247
return metas.filter { $0.property == property && $0.vocabularyURI == vocabulary.uri && $0.refines == id }
214248
}
215249

250+
func links(withRel rel: String, in vocabulary: OPFVocabulary) -> [OPFLink] {
251+
links.filter { $0.rel == rel && $0.vocabularyURI == vocabulary.uri }
252+
}
253+
254+
func links(withRel rel: String, in vocabulary: OPFVocabulary, refining id: String) -> [OPFLink] {
255+
links.filter { $0.rel == rel && $0.vocabularyURI == vocabulary.uri && $0.refines == id }
256+
}
257+
216258
/// Returns the JSON representation of the unknown metadata
217259
/// (for RWPM's `Metadata.otherMetadata`)
218260
var otherMetadata: [String: Any] {
@@ -262,14 +304,19 @@ struct OPFMetaList {
262304
/// List of properties that should not be added to `otherMetadata` because they are already
263305
/// consumed by the RWPM model.
264306
private let rwpmProperties: [OPFVocabulary: [String]] = [
307+
.a11y: ["certifiedBy", "certifierCredential", "certifierReport"],
265308
.defaultMetadata: ["cover"],
266309
.dcterms: [
267-
"contributor", "creator", "date", "description", "identifier", "language", "modified",
268-
"publisher", "subject", "title"
310+
"contributor", "creator", "date", "description", "identifier",
311+
"language", "modified", "publisher", "subject", "title",
312+
"conformsTo"
269313
],
270314
.media: ["duration"],
271315
.rendition: ["flow", "layout", "orientation", "spread"],
272-
.schema: ["numberOfPages"]
316+
.schema: [
317+
"numberOfPages", "accessMode", "accessModeSufficient",
318+
"accessibilitySummary", "accessibilityFeature", "accessibilityHazard"
319+
]
273320
]
274321

275322
/// Returns whether the given meta is a known RWPM property, and should therefore be ignored in

0 commit comments

Comments
 (0)