-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathindex.js
More file actions
287 lines (258 loc) · 9.27 KB
/
index.js
File metadata and controls
287 lines (258 loc) · 9.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
import prompt from 'prompt-sync';
import fetch from 'node-fetch';
import { PDFDocument } from 'pdf-lib';
import fs from 'fs';
import sanitize from 'sanitize-filename';
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
import md5 from 'md5';
import { spawn } from 'child_process';
import path from 'path';
import pLimit from 'p-limit';
import { fetchEncryptionKey, decryptFile } from './src/crypto.js';
import { getUserInfo, getBooks, getBookInfo, getBookResources } from './src/api.js';
const promptSync = prompt({ sigint: true });
const argv = yargs(hideBin(process.argv))
.option('site', {
describe: 'The site to download from, currently either bsmart or digibook24',
type: 'string',
default: null
})
.option('siteUrl', {
describe: 'This overwrites the base url for the site, useful in case a new platform is added',
type: 'string',
default: null
})
.option('cookie', {
describe: 'Input "_bsw_session_v1_production" cookie',
type: 'string',
default: null
})
.option('bookId', {
describe: 'Book id',
type: 'string',
default: null
})
.option('downloadOnly', {
describe: 'Downloads the pages as individual pdfs and will provide a command that can be used to merge them with pdftk',
type: 'boolean',
default: false
})
.option('key', {
describe: 'Manually provide encryption key as 32 hex bytes',
type: 'string',
})
.option('pdftk', {
describe: 'Downloads the pages as individual pdfs and merges them with pdftk',
type: 'boolean',
default: false
})
.option('pdftkPath', {
describe: 'Path to pdftk executable',
type: 'string',
default: 'pdftk'
})
.option('checkMd5', {
describe: 'Checks the md5 hash of the downloaded pages',
type: 'boolean',
default: false
})
.option('output', {
describe: 'Output filename',
type: 'string',
default: null
})
.option('resources', {
describe: 'Download resources of the book instrad of the book it self',
type: 'boolean',
default: false
})
.option('concurrency', {
describe: 'Number of parallel downloads',
type: 'number',
default: 4
})
.help()
.argv;
(async () => {
if (argv.downloadOnly && argv.pdftk) {
console.log("Can't use --download-only and --pdftk at the same time");
return;
}
if (argv.key) {
argv.key = argv.key.replaceAll(" ", "").toLowerCase();
if (!(/^([\da-f]{32})$/.test(argv.key))) {
console.log("Bad encryption key provided, must be 32 hex characters.");
return;
}
argv.key
}
if ((argv.downloadOnly || argv.pdftk) && !fs.existsSync('temp')) {
fs.mkdirSync('temp');
}
if ((argv.downloadOnly || argv.pdftk) && fs.readdirSync('temp').length > 0) {
console.log("Files already in temp folder, please manually delete them if you want to download a new book");
return;
}
let baseSite = argv.siteUrl;
if (!baseSite) {
let platform = argv.site;
while (!platform) {
platform = promptSync('Input site (bsmart or digibook24):');
if (platform != 'bsmart' && platform != 'digibook24') {
platform = null;
console.log('Invalid site');
}
}
baseSite = platform == 'bsmart' ? 'www.bsmart.it' : 'web.digibook24.com';
}
let cookie = argv.cookie;
while (!cookie) {
cookie = promptSync('Input "_bsw_session_v1_production" cookie:');
}
// Get user info with cookie to obtain auth_token
let user;
try {
const cookieHeaders = { cookie: '_bsw_session_v1_production=' + cookie };
user = await getUserInfo(baseSite, cookieHeaders);
} catch (error) {
console.log("Error fetching user info:", error);
return;
}
// Create headers object with auth_token for all subsequent API calls
const headers = { "auth_token": user.auth_token };
// Get books list
let books;
try {
books = await getBooks(baseSite, headers);
} catch (error) {
console.log("Error fetching books:", error);
return;
}
if (books.length == 0) {
console.log('No books in your library!');
} else {
console.log("Book list:");
console.table(books.map(book => ({ id: book.id, title: book.title })));
}
let bookId = argv.bookId;
while (!bookId) {
bookId = promptSync(`Please input book id${(books.length == 0 ? " manually" : "")}:`);
}
console.log(`Fetching book info`);
// Get book info
let book;
try {
book = await getBookInfo(baseSite, bookId, headers);
} catch (error) {
console.log("Error fetching book info:", error.message);
return;
}
// Get book resources
let info;
try {
info = await getBookResources(baseSite, book, headers);
} catch (error) {
console.log("Error fetching book resources:", error);
return;
}
const outputPdf = await PDFDocument.create();
const filenames = [];
const outputname = argv.output || sanitize(book.id + " - " + book.title);
let assets = info.map(e => e.assets).flat();
let encryptionKey;
if (argv.key) {
console.log('Using provided encryption key');
encryptionKey = Buffer.from(argv.key, "hex");
} else {
try {
console.log('Fetching encryption key');
encryptionKey = await fetchEncryptionKey();
} catch (error) {
console.log("Error fetching encryption key:", error);
return;
}
}
if (argv.resources) {
assets = assets.filter(e => e.use == "launch_file");
if (!fs.existsSync(outputname)) {
fs.mkdirSync(outputname);
}
console.log("Downloading resources");
} else {
assets = assets.filter(e => e.use == "page_pdf");
console.log("Downloading pages");
}
// Set up concurrency limit
const limit = pLimit(argv.concurrency);
// Create download tasks with concurrency control
const downloadTasks = assets.map((asset, i) =>
limit(async () => {
try {
let data = await fetch(asset.url).then(res => res.buffer());
if (asset.encrypted !== false) {
data = await decryptFile(data, encryptionKey);
}
if (argv.checkMd5 && md5(data) != asset.url) {
console.log(`\nMismatching md5 hash for asset ${i}: ${asset.url}`);
}
return data;
} catch (e) {
console.log(`\nError downloading asset ${i}: ${e.message}`);
throw e;
}
})
);
// Process results in order to maintain page order
for (let i = 0; i < assets.length; i++) {
const asset = assets[i];
let data;
try {
data = await downloadTasks[i];
} catch (e) {
// Error already logged in the task
return;
}
process.stdout.write(`\rProgress ${((i + 1) / assets.length * 100).toFixed(2)}% (${i + 1}/${assets.length})`);
if (argv.resources) {
const filename = path.basename(asset.filename);
await fs.promises.writeFile(`${outputname}/${filename}`, data);
} else {
if (argv.downloadOnly || argv.pdftk) {
const filename = path.basename(asset.filename, '.pdf');
const filePath = `temp/${filename}.pdf`;
await fs.promises.writeFile(filePath, data);
filenames.push(filePath);
} else {
const page = await PDFDocument.load(data);
const [firstDonorPage] = await outputPdf.copyPages(page, [0]);
outputPdf.addPage(firstDonorPage);
}
}
}
console.log(); // New line after progress bar
if (argv.resources) {
// do nothing
} else if (!argv.downloadOnly && !argv.pdftk) {
await fs.promises.writeFile(outputname + ".pdf", await outputPdf.save());
} else {
const pdftkCommand = `${argv.pdftkPath} ${filenames.join(' ')} cat output "${outputname}.pdf"`;
console.log("Run this command to merge the pages with pdftk:");
console.log(pdftkCommand);
if (argv.pdftk) {
console.log("Merging pages with pdftk");
const pdftk = spawn(argv.pdftkPath, filenames.concat(['cat', 'output', outputname + ".pdf"]));
pdftk.stdout.on('data', (data) => {
console.log(`stdout: ${data}`);
});
pdftk.stderr.on('data', (data) => {
console.log(`stderr: ${data}`);
});
pdftk.on('close', (code) => {
console.log(`child process exited with code ${code}`);
console.log("Done");
});
}
}
console.log("Done");
})();