Skip to content

Commit 3016f2c

Browse files
committed
Add ISBN 10 to 13 converter bot
1 parent cbeb30b commit 3016f2c

File tree

4 files changed

+107
-0
lines changed

4 files changed

+107
-0
lines changed

isbn10to13bot/README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
A set of scripts to add isbn_13 values to editions with valid isbn_10.
2+
### How To Use
3+
```bash
4+
# Find Editions with ISBN 10, but no ISBN 13
5+
./find_editions_with_isbn10not13.sh /path/to/ol_dump.txt.gz /path/to/filtered_dump.txt.gz
6+
# Add ISBN 13s converted from the ISBN 10
7+
python isbn_10_to_13.py --dump_path=/path/to/filtered_dump.txt.gz --dry_run=<bool> --limit=<init>
8+
```
9+
If `dry_run` is True, the script will run as normal, but no changes will be saved to OpenLibrary.
10+
This is for debugging purposes. By default, `dry_run` is `True`.
11+
`limit` is the maximum number of changes to OpenLibrary that will occur before the script quits.
12+
By default, `limit` is set to `1`. Setting `limit` to `0` allows unlimited edits.
13+
A log is automatically generated whenever `isbn_10_to_13.py` executes.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/bin/bash
2+
3+
if [[ -z $1 ]]
4+
then
5+
echo "No dump file provided"
6+
exit 1
7+
fi
8+
if [[ -z $2 ]]
9+
then
10+
echo "No output file provided"
11+
exit 1
12+
fi
13+
14+
OL_DUMP=$1
15+
OUTPUT=$2
16+
17+
zgrep ^/type/edition $OL_DUMP | grep -E '"isbn_10":' | grep -v -E '"isbn_13":' | pv | gzip > $OUTPUT

isbn10to13bot/isbn_10_to_13.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
"""
2+
isbn 10 to isbn 13
3+
NOTE: This script ideally works on an Open Library Dump that only contains editions with an isbn_10 and no isbn_13
4+
"""
5+
import gzip
6+
import json
7+
8+
import isbnlib
9+
import olclient
10+
11+
12+
class ConvertISBN10to13Job(olclient.AbstractBotJob):
13+
14+
def run(self) -> None:
15+
"""Looks for any ISBN 10s to convert to 13"""
16+
self.write_changes_declaration()
17+
header = {"type": 0, "key": 1, "revision": 2, "last_modified": 3, "JSON": 4}
18+
comment = "convert ISBN 10 to 13 using isbnlib"
19+
with gzip.open(self.args.file, "rb") as fin:
20+
for row_num, row in enumerate(fin):
21+
row = row.decode().split("\t")
22+
_json = json.loads(row[header["JSON"]])
23+
if _json["type"]["key"] != "/type/edition":
24+
continue
25+
26+
if "isbn_10" in _json:
27+
isbns_10 = _json.get("isbn_10", None)
28+
else:
29+
# we only update editions with existing isbn 10s
30+
continue
31+
if "isbn_13" in _json:
32+
# we only update editions with no existing isbn 13s (for now at least)
33+
continue
34+
35+
olid = _json["key"].split("/")[-1]
36+
edition = self.ol.Edition.get(olid)
37+
if edition.type["key"] != "/type/edition":
38+
continue
39+
40+
isbns_13 = []
41+
for isbn in isbns_10:
42+
canonical = isbnlib.canonical(isbn)
43+
if isbnlib.is_isbn10(canonical):
44+
isbn_13 = isbnlib.to_isbn13(canonical)
45+
if isbnlib.is_isbn13(canonical):
46+
isbn_13 = canonical
47+
if isbn_13:
48+
isbns_13.append(isbn_13)
49+
50+
if len(isbns_13) > 1:
51+
isbns_13 = dedupe(isbns_13) # remove duplicates, shouldn't normally be necessary
52+
53+
setattr(edition, 'isbn_13', isbns_13)
54+
self.logger.info(
55+
"\t".join([olid, str(isbns_10), str(isbns_13)])
56+
)
57+
self.save(lambda: edition.save(comment=comment))
58+
59+
def dedupe(input_list: list) -> list:
60+
"""Remove duplicate elements in a list and return the new list"""
61+
output = []
62+
for i in input_list:
63+
if i not in output:
64+
output.append(i)
65+
return output
66+
67+
68+
if __name__ == "__main__":
69+
job = ConvertISBN10to13Job()
70+
71+
try:
72+
job.run()
73+
except Exception as e:
74+
job.logger.exception(e)
75+
raise e

isbn10to13bot/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
openlibrary-client==0.0.30
2+
isbnlib==3.10.14

0 commit comments

Comments
 (0)