Skip to content

Commit 44412f2

Browse files
committed
Add ISBN 10 to 13 converter bot
1 parent cbeb30b commit 44412f2

File tree

4 files changed

+112
-0
lines changed

4 files changed

+112
-0
lines changed

isbn10to13bot/README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
A set of scripts to add isbn_13 values to editions with valid isbn_10.
2+
### How To Use
3+
```bash
4+
# Find Editions with ISBN 10, but no ISBN 13
5+
./find_editions_with_isbn10not13.sh /path/to/ol_dump.txt.gz /path/to/filtered_dump.txt.gz
6+
# Add ISBN 13s converted from the ISBN 10
7+
python isbn_10_to_13.py --dump_path=/path/to/filtered_dump.txt.gz --dry_run=<bool> --limit=<init>
8+
```
9+
If `dry_run` is True, the script will run as normal, but no changes will be saved to OpenLibrary.
10+
This is for debugging purposes. By default, `dry_run` is `True`.
11+
`limit` is the maximum number of changes to OpenLibrary that will occur before the script quits.
12+
By default, `limit` is set to `1`. Setting `limit` to `0` allows unlimited edits.
13+
A log is automatically generated whenever `isbn_10_to_13.py` executes.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/bin/bash
2+
3+
if [[ -z $1 ]]
4+
then
5+
echo "No dump file provided"
6+
exit 1
7+
fi
8+
if [[ -z $2 ]]
9+
then
10+
echo "No output file provided"
11+
exit 1
12+
fi
13+
14+
OL_DUMP=$1
15+
OUTPUT=$2
16+
17+
zgrep ^/type/edition $OL_DUMP | grep -E '"isbn_10":' | grep -v -E '"isbn_13":' | pv | gzip > $OUTPUT

isbn10to13bot/isbn_10_to_13.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""
2+
isbn 10 to isbn 13
3+
NOTE: This script ideally works on an Open Library Dump that only contains editions with an isbn_10 and no isbn_13
4+
"""
5+
import gzip
6+
import json
7+
8+
import isbnlib
9+
import olclient
10+
11+
12+
class ConvertISBN10to13Job(olclient.AbstractBotJob):
13+
def run(self) -> None:
14+
"""Looks for any ISBN 10s to convert to 13"""
15+
self.write_changes_declaration()
16+
header = {"type": 0, "key": 1, "revision": 2, "last_modified": 3, "JSON": 4}
17+
comment = "convert ISBN 10 to 13 using isbnlib"
18+
with gzip.open(self.args.file, "rb") as fin:
19+
for row_num, row in enumerate(fin):
20+
row = row.decode().split("\t")
21+
_json = json.loads(row[header["JSON"]])
22+
if _json["type"]["key"] != "/type/edition":
23+
continue
24+
25+
if "isbn_10" in _json:
26+
isbns_10 = _json.get("isbn_10", None)
27+
else:
28+
# we only update editions with existing isbn 10s
29+
continue
30+
31+
if "isbn_13" in _json:
32+
# we only update editions with no existing isbn 13s (for now at least)
33+
continue
34+
35+
olid = _json["key"].split("/")[-1]
36+
edition = self.ol.Edition.get(olid)
37+
if edition.type["key"] != "/type/edition":
38+
continue
39+
40+
if hasattr(edition, "isbn_13"):
41+
# we only update editions with no existing isbn 13s (for now at least)
42+
continue
43+
44+
isbns_13 = []
45+
for isbn in isbns_10:
46+
canonical = isbnlib.canonical(isbn)
47+
if isbnlib.is_isbn10(canonical):
48+
isbn_13 = isbnlib.to_isbn13(canonical)
49+
if isbnlib.is_isbn13(canonical):
50+
isbn_13 = canonical
51+
if isbn_13:
52+
isbns_13.append(isbn_13)
53+
54+
if len(isbns_13) > 1:
55+
isbns_13 = dedupe(
56+
isbns_13
57+
) # remove duplicates, shouldn't normally be necessary
58+
59+
setattr(edition, "isbn_13", isbns_13)
60+
self.logger.info("\t".join([olid, str(isbns_10), str(isbns_13)]))
61+
self.save(lambda: edition.save(comment=comment))
62+
63+
64+
def dedupe(input_list: list) -> list:
65+
"""Remove duplicate elements in a list and return the new list"""
66+
output = []
67+
for i in input_list:
68+
if i not in output:
69+
output.append(i)
70+
return output
71+
72+
73+
if __name__ == "__main__":
74+
job = ConvertISBN10to13Job()
75+
76+
try:
77+
job.run()
78+
except Exception as e:
79+
job.logger.exception(e)
80+
raise e

isbn10to13bot/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
openlibrary-client==0.0.30
2+
isbnlib==3.10.14

0 commit comments

Comments
 (0)