Skip to content

Commit 76418df

Browse files
committed
add more sophisticated DRM detection for EPUB
1 parent 05b47f9 commit 76418df

18 files changed

Lines changed: 424 additions & 62 deletions

pkg/archive/archive_zip.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@ import (
1111
"path"
1212
"sync"
1313

14-
"github.com/chocolatkey/gzran"
1514
"github.com/pkg/errors"
15+
"github.com/readium/zran"
1616
)
1717

1818
type gozipArchiveEntry struct {
1919
file *zip.File
2020
minimizeReads bool
2121

22-
gi gzran.Index
22+
gi zran.Index
2323
gm sync.Mutex
2424
}
2525

@@ -115,7 +115,7 @@ func (e *gozipArchiveEntry) Read(start int64, end int64) ([]byte, error) {
115115
// This special reader lets us restore the decompressor state at known offsets
116116
// which is useful when a client has already requested previous parts of the file,
117117
// such as when a web browser requests subsequent byte ranges for media playback.
118-
fzr, err := gzran.NewDReader(bytes.NewReader(compressedData)) // Default interval = 1MB, same as current ZRandCutoff
118+
fzr, err := zran.NewDReader(bytes.NewReader(compressedData)) // Default interval = 1MB, same as current ZRandCutoff
119119
if err != nil {
120120
return nil, err
121121
}

pkg/parser/epub/factory.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@ package epub
33
import (
44
"github.com/readium/go-toolkit/pkg/internal/extensions"
55
"github.com/readium/go-toolkit/pkg/manifest"
6-
"github.com/readium/go-toolkit/pkg/util/url"
76
)
87

98
type PublicationFactory struct {
109
FallbackTitle string
1110
PackageDocument PackageDocument
1211
NavigationData map[string]manifest.LinkList
13-
EncryptionData map[url.URL]manifest.Encryption
12+
EncryptionData map[string]manifest.Encryption
1413
DisplayOptions map[string]string
1514

1615
itemById map[string]Item
@@ -193,7 +192,7 @@ func (f PublicationFactory) computePropertiesAndRels(item Item, itemref *ItemRef
193192
rels = extensions.AddToSet(rels, "cover")
194193
}
195194

196-
if edat, ok := f.EncryptionData[item.Href]; ok {
195+
if edat, ok := f.EncryptionData[item.Href.Normalize().String()]; ok {
197196
properties["encrypted"] = edat.ToMap() // ToMap makes it JSON-like
198197
}
199198

pkg/parser/epub/parser.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ import (
99
"github.com/readium/go-toolkit/pkg/fetcher"
1010
"github.com/readium/go-toolkit/pkg/manifest"
1111
"github.com/readium/go-toolkit/pkg/mediatype"
12+
"github.com/readium/go-toolkit/pkg/protection"
1213
"github.com/readium/go-toolkit/pkg/pub"
13-
"github.com/readium/go-toolkit/pkg/util/url"
1414
)
1515

1616
type Parser struct {
@@ -51,11 +51,27 @@ func (p Parser) Parse(ctx context.Context, asset asset.PublicationAsset, f fetch
5151
return nil, errors.Wrap(err, "invalid OPF file")
5252
}
5353

54+
// Detect the container-level DRM scheme. This is done unconditionally,
55+
// not gated on the presence of META-INF/encryption.xml, because schemes
56+
// like Adobe ADEPT, Barnes & Noble, Apple FairPlay and Kobo announce
57+
// themselves through other well-known files (rights.xml / sinf.xml).
58+
// TODO: surface the publication-level scheme on the manifest itself so
59+
// consumers can detect protection even when encryption.xml is absent.
60+
scheme, err := protection.IdentifyEPUBProtection(ctx, f)
61+
if err != nil {
62+
return nil, errors.Wrap(err, "failed identifying EPUB protection scheme")
63+
}
64+
65+
encryptionData, err := parseEncryptionData(ctx, f, scheme.URI())
66+
if err != nil {
67+
return nil, errors.Wrap(err, "failed parsing encryption data")
68+
}
69+
5470
manifest := PublicationFactory{
5571
FallbackTitle: fallbackTitle,
5672
PackageDocument: *packageDocument,
5773
NavigationData: parseNavigationData(ctx, *packageDocument, f),
58-
EncryptionData: parseEncryptionData(ctx, f),
74+
EncryptionData: encryptionData,
5975
DisplayOptions: parseDisplayOptions(ctx, f),
6076
}.Create()
6177

@@ -74,12 +90,16 @@ func (p Parser) Parse(ctx context.Context, asset asset.PublicationAsset, f fetch
7490
return pub.NewBuilder(manifest, ffetcher, builder), nil
7591
}
7692

77-
func parseEncryptionData(ctx context.Context, f fetcher.Fetcher) (ret map[url.URL]manifest.Encryption) {
78-
n, err := fetcher.ReadResourceAsXML(ctx, f.Get(ctx, manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/encryption.xml", false)}))
79-
if err != nil {
80-
return
93+
// parseEncryptionData parses META-INF/encryption.xml when present and stamps
94+
// each entry with the supplied DRM scheme URI (typically obtained by calling
95+
// [protection.IdentifyEPUBProtection] at a higher level). A missing
96+
// encryption.xml is normal and returns (nil, nil).
97+
func parseEncryptionData(ctx context.Context, f fetcher.Fetcher, scheme string) (map[string]manifest.Encryption, error) {
98+
n, rerr := fetcher.ReadResourceAsXML(ctx, f.Get(ctx, manifest.Link{Href: manifest.MustNewHREFFromString("META-INF/encryption.xml", false)}))
99+
if rerr != nil {
100+
return nil, nil
81101
}
82-
return ParseEncryption(n)
102+
return ParseEncryption(n, scheme), nil
83103
}
84104

85105
func parseNavigationData(ctx context.Context, packageDocument PackageDocument, f fetcher.Fetcher) (ret map[string]manifest.LinkList) {

pkg/parser/epub/parser_encryption.go

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,59 +5,45 @@ import (
55

66
"github.com/antchfx/xmlquery"
77
"github.com/readium/go-toolkit/pkg/manifest"
8-
"github.com/readium/go-toolkit/pkg/protection"
98
"github.com/readium/go-toolkit/pkg/util/url"
109
)
1110

1211
var (
1312
xpEncEncData = mustCompileNS("//enc:EncryptedData")
1413
xpEncCipherData = mustCompileNS("enc:CipherData")
1514
xpEncCipherRef = mustCompileNS("enc:CipherReference")
16-
xpEncKeyInfo = mustCompileNS("ds:KeyInfo")
17-
xpEncRetrieval = mustCompileNS("ds:RetrievalMethod")
1815
xpEncMethod = mustCompileNS("enc:EncryptionMethod")
1916
xpEncProps = mustCompileNS("enc:EncryptionProperties")
2017
xpEncProp = mustCompileNS("enc:EncryptionProperty")
2118
xpEncCompress = mustCompileNS("comp:Compression")
2219
)
2320

24-
func ParseEncryption(document *xmlquery.Node) (ret map[url.URL]manifest.Encryption) {
21+
func ParseEncryption(document *xmlquery.Node, scheme string) (ret map[string]manifest.Encryption) {
2522
for _, node := range xmlquery.QuerySelectorAll(document, xpEncEncData) {
26-
u, e := parseEncryptedData(node)
23+
key, e := parseEncryptedData(node, scheme)
2724
if e != nil {
2825
if ret == nil {
29-
ret = make(map[url.URL]manifest.Encryption)
26+
ret = make(map[string]manifest.Encryption)
3027
}
31-
ret[u] = *e
28+
ret[key] = *e
3229
}
3330
}
3431
return
3532
}
3633

37-
func parseEncryptedData(node *xmlquery.Node) (url.URL, *manifest.Encryption) {
34+
func parseEncryptedData(node *xmlquery.Node, scheme string) (string, *manifest.Encryption) {
3835
cdat := xmlquery.QuerySelector(node, xpEncCipherData)
3936
if cdat == nil {
40-
return nil, nil
37+
return "", nil
4138
}
4239
cipherref := xmlquery.QuerySelector(cdat, xpEncCipherRef)
4340
if cipherref == nil {
44-
return nil, nil
41+
return "", nil
4542
}
4643
resourceURI := cipherref.SelectAttr("URI")
4744

48-
retrievalMethod := ""
49-
if keyinfo := xmlquery.QuerySelector(node, xpEncKeyInfo); keyinfo != nil {
50-
if r := xmlquery.QuerySelector(keyinfo, xpEncRetrieval); r != nil {
51-
retrievalMethod = r.SelectAttr("URI")
52-
}
53-
}
54-
5545
ret := &manifest.Encryption{
56-
// TODO: No profile? https://github.com/readium/kotlin-toolkit/blob/develop/readium/streamer/src/main/java/org/readium/r2/streamer/parser/epub/EncryptionParser.kt#L40
57-
}
58-
59-
if retrievalMethod == "license.lcpl#/encryption/content_key" {
60-
ret.Scheme = protection.SchemeLCP
46+
Scheme: scheme,
6147
}
6248

6349
if encryptionmethod := xmlquery.QuerySelector(node, xpEncMethod); encryptionmethod != nil {
@@ -74,10 +60,10 @@ func parseEncryptedData(node *xmlquery.Node) (url.URL, *manifest.Encryption) {
7460

7561
ru, err := url.FromEPUBHref(resourceURI)
7662
if err != nil {
77-
return nil, nil
63+
return "", nil
7864
}
7965

80-
return ru, ret
66+
return ru.Normalize().String(), ret
8167
}
8268

8369
func parseEncryptionProperties(encryptionProperties *xmlquery.Node) (int64, string) {

pkg/parser/epub/parser_encryption_test.go

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,19 @@ import (
66

77
"github.com/readium/go-toolkit/pkg/fetcher"
88
"github.com/readium/go-toolkit/pkg/manifest"
9+
"github.com/readium/go-toolkit/pkg/protection"
910
"github.com/readium/go-toolkit/pkg/util/url"
1011
"github.com/stretchr/testify/assert"
1112
"github.com/stretchr/testify/require"
1213
)
1314

14-
func loadEncryption(ctx context.Context, name string) (map[string]manifest.Encryption, error) {
15+
func loadEncryption(ctx context.Context, name string, scheme protection.Scheme) (map[string]manifest.Encryption, error) {
1516
n, rerr := fetcher.ReadResourceAsXML(ctx, fetcher.NewFileResource(manifest.Link{}, "./testdata/encryption/encryption-"+name+".xml"))
1617
if rerr != nil {
1718
return nil, rerr.Cause
1819
}
1920

20-
enc := ParseEncryption(n)
21-
ret := make(map[string]manifest.Encryption)
22-
for k, v := range enc {
23-
ret[k.String()] = v
24-
}
25-
26-
return ret, nil
21+
return ParseEncryption(n, scheme.URI()), nil
2722
}
2823

2924
var testEncMap = map[string]manifest.Encryption{
@@ -42,19 +37,19 @@ var testEncMap = map[string]manifest.Encryption{
4237
}
4338

4439
func TestEncryptionParserNamespacePrefixes(t *testing.T) {
45-
e, err := loadEncryption(t.Context(), "lcp-prefixes")
40+
e, err := loadEncryption(t.Context(), "lcp-prefixes", protection.LCP)
4641
require.NoError(t, err)
4742
assert.Equal(t, testEncMap, e)
4843
}
4944

5045
func TestEncryptionParserDefaultNamespaces(t *testing.T) {
51-
e, err := loadEncryption(t.Context(), "lcp-xmlns")
46+
e, err := loadEncryption(t.Context(), "lcp-xmlns", protection.LCP)
5247
require.NoError(t, err)
5348
assert.Equal(t, testEncMap, e)
5449
}
5550

5651
func TestEncryptionParserUnknownRetrievalMethod(t *testing.T) {
57-
e, err := loadEncryption(t.Context(), "unknown-method")
52+
e, err := loadEncryption(t.Context(), "unknown-method", protection.NoDRM)
5853
require.NoError(t, err)
5954
assert.Equal(t, map[string]manifest.Encryption{
6055
url.MustURLFromString("OEBPS/images/image.jpeg").String(): {

pkg/parser/epub/parser_test.go

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
package epub
2+
3+
import (
4+
"context"
5+
"errors"
6+
"testing"
7+
8+
"github.com/readium/go-toolkit/pkg/asset"
9+
"github.com/readium/go-toolkit/pkg/fetcher"
10+
"github.com/readium/go-toolkit/pkg/mediatype"
11+
"github.com/readium/go-toolkit/pkg/protection"
12+
"github.com/stretchr/testify/assert"
13+
"github.com/stretchr/testify/require"
14+
)
15+
16+
// fakeEPUBAsset is a minimal [asset.PublicationAsset] for parser tests.
17+
// CreateFetcher is never invoked because the tests construct the fetcher
18+
// themselves and pass it to [Parser.Parse] directly.
19+
type fakeEPUBAsset struct{ name string }
20+
21+
func (a fakeEPUBAsset) Name() string { return a.name }
22+
func (a fakeEPUBAsset) MediaType(context.Context) mediatype.MediaType { return mediatype.EPUB }
23+
func (a fakeEPUBAsset) CreateFetcher(context.Context, asset.Dependencies, string) (fetcher.Fetcher, error) {
24+
return nil, errors.New("unused in tests")
25+
}
26+
27+
func openProtectionFixture(t *testing.T, file string) *fetcher.ArchiveFetcher {
28+
t.Helper()
29+
f, err := fetcher.NewArchiveFetcherFromPath(t.Context(), "../../protection/testdata/"+file)
30+
require.NoError(t, err)
31+
t.Cleanup(f.Close)
32+
return f
33+
}
34+
35+
// TestParserEndToEnd exercises [Parser.Parse] against every DRM fixture in
36+
// pkg/protection/testdata, asserting the parse succeeds and surfaces the
37+
// expected publication metadata. expectedScheme is the manifest.Encryption
38+
// scheme URI that every encrypted resource should carry — empty means either
39+
// no encryption.xml is present (so no resource should be tagged) or the
40+
// encryption is generic (no DRM scheme attached).
41+
func TestParserEndToEnd(t *testing.T) {
42+
for _, tt := range []struct {
43+
name string
44+
file string
45+
title string
46+
readingOrderSize int
47+
hasEncryptionXML bool
48+
expectedScheme string
49+
}{
50+
{"Adobe ADEPT", "fake-adept.epub", "Fake Adept DRM", 1, false, ""},
51+
{"Barnes & Noble", "fake-bn.epub", "Fake B&N DRM", 1, false, ""},
52+
{"Apple FairPlay", "fake-fairplay.epub", "Fake Fairplay DRM", 1, false, ""},
53+
{"Kobo", "fake-kobo.epub", "Fake Kobo DRM", 1, false, ""},
54+
{"Readium LCP", "fake-lcp.epub", "The Level 999 Villager Chapter 3", 35, true, protection.SchemeLCP},
55+
{"Generic encryption (Yahoo)", "yahoo.ypub", "週刊少年マガジン 2019年8号[2019年1月23日発売]", 540, true, ""},
56+
} {
57+
t.Run(tt.name, func(t *testing.T) {
58+
f := openProtectionFixture(t, tt.file)
59+
builder, err := NewParser(nil).Parse(t.Context(), fakeEPUBAsset{name: tt.file}, f)
60+
require.NoError(t, err)
61+
require.NotNil(t, builder)
62+
63+
m := builder.Manifest
64+
assert.Equal(t, tt.title, m.Metadata.Title())
65+
assert.Lenf(t, m.ReadingOrder, tt.readingOrderSize,
66+
"expected reading order size %d, got %d", tt.readingOrderSize, len(m.ReadingOrder))
67+
68+
encryptedCount := 0
69+
for _, link := range append(m.ReadingOrder, m.Resources...) {
70+
enc, ok := link.Properties["encrypted"].(map[string]interface{})
71+
if !ok {
72+
continue
73+
}
74+
encryptedCount++
75+
if tt.expectedScheme == "" {
76+
_, hasScheme := enc["scheme"]
77+
assert.Falsef(t, hasScheme, "%s should not have a scheme", link.Href.String())
78+
} else {
79+
assert.Equalf(t, tt.expectedScheme, enc["scheme"], "scheme mismatch for %s", link.Href.String())
80+
}
81+
}
82+
if tt.hasEncryptionXML {
83+
assert.Greater(t, encryptedCount, 0, "expected at least one resource to carry encryption properties")
84+
} else {
85+
assert.Zero(t, encryptedCount, "no resource should carry encryption properties")
86+
}
87+
})
88+
}
89+
}
90+
91+
// TestParseEncryptionDataScheme exercises the same two-step the Parser uses:
92+
// [protection.IdentifyEPUBProtection] followed by parseEncryptionData, and
93+
// verifies the detected scheme reaches every encryption.xml entry.
94+
func TestParseEncryptionDataScheme(t *testing.T) {
95+
for _, tt := range []struct {
96+
name string
97+
file string
98+
expectScheme string
99+
expectEntries bool
100+
}{
101+
{"Readium LCP", "fake-lcp.epub", protection.SchemeLCP, true},
102+
{"Generic encryption (Yahoo)", "yahoo.ypub", "", true},
103+
// EPUBs without META-INF/encryption.xml yield no entries at all.
104+
{"Adobe ADEPT (no encryption.xml)", "fake-adept.epub", "", false},
105+
{"Kobo (no encryption.xml)", "fake-kobo.epub", "", false},
106+
} {
107+
t.Run(tt.name, func(t *testing.T) {
108+
f := openProtectionFixture(t, tt.file)
109+
110+
scheme, err := protection.IdentifyEPUBProtection(t.Context(), f)
111+
require.NoError(t, err)
112+
113+
enc, err := parseEncryptionData(t.Context(), f, scheme.URI())
114+
require.NoError(t, err)
115+
if !tt.expectEntries {
116+
assert.Empty(t, enc)
117+
return
118+
}
119+
require.NotEmpty(t, enc)
120+
for u, e := range enc {
121+
assert.Equalf(t, tt.expectScheme, e.Scheme, "scheme mismatch for %s", u)
122+
}
123+
})
124+
}
125+
}

pkg/protection/drm.go

Lines changed: 0 additions & 14 deletions
This file was deleted.

0 commit comments

Comments
 (0)