-
Notifications
You must be signed in to change notification settings - Fork 62
Expand file tree
/
Copy pathisbn_ia_to_13.py
More file actions
70 lines (56 loc) · 2.27 KB
/
isbn_ia_to_13.py
File metadata and controls
70 lines (56 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
BWB isbn ref to isbn 13
NOTE: This script ideally works on an Open Library Dump that only contains editions with an BWB isbn ref and no isbn_13
"""
import gzip
import json
import re
import isbnlib
import olclient
class ConvertISBNiato13Job(olclient.AbstractBotJob):
def run(self) -> None:
"""Looks for any IA ISBN to convert to 13"""
self.write_changes_declaration()
header = {"type": 0, "key": 1, "revision": 2, "last_modified": 3, "JSON": 4}
comment = "extract ISBN 13 from IA source_record"
with gzip.open(self.args.file, "rb") as fin:
for row_num, row in enumerate(fin):
row = row.decode().split("\t")
_json = json.loads(row[header["JSON"]])
if _json["type"]["key"] != "/type/edition":
continue
if hasattr(_json, "isbn_13"):
# we only update editions with no existing isbn 13s (for now at least)
continue
if "source_records" in _json:
source_records = _json.get("source_records", None)
else:
continue
regex = "ia:isbn_[0-9]{13}"
isbn_13 = False
for source_record in source_records:
if re.fullmatch(regex, source_record):
isbn_13 = source_record[8:]
break
if not isbn_13:
continue
if not isbnlib.is_isbn13(isbn_13):
continue
olid = _json["key"].split("/")[-1]
edition = self.ol.Edition.get(olid)
if edition.type["key"] != "/type/edition":
continue
if hasattr(edition, "isbn_13"):
# don't update editions that already have an isbn 13
continue
isbns_13 = [isbn_13]
setattr(edition, "isbn_13", isbns_13)
self.logger.info("\t".join([olid, source_record, str(isbns_13)]))
self.save(lambda: edition.save(comment=comment))
if __name__ == "__main__":
job = ConvertISBNiato13Job()
try:
job.run()
except Exception as e:
job.logger.exception(e)
raise e