|
| 1 | +""" |
| 2 | +isbn 10 to isbn 13 |
| 3 | +NOTE: This script ideally works on an Open Library Dump that only contains editions with an isbn_10 and no isbn_13 |
| 4 | +""" |
| 5 | +import gzip |
| 6 | +import json |
| 7 | + |
| 8 | +import isbnlib |
| 9 | +import olclient |
| 10 | + |
| 11 | + |
| 12 | +class ConvertISBN10to13Job(olclient.AbstractBotJob): |
| 13 | + def run(self) -> None: |
| 14 | + """Looks for any ISBN 10s to convert to 13""" |
| 15 | + self.write_changes_declaration() |
| 16 | + header = {"type": 0, "key": 1, "revision": 2, "last_modified": 3, "JSON": 4} |
| 17 | + comment = "convert ISBN 10 to 13 using isbnlib" |
| 18 | + with gzip.open(self.args.file, "rb") as fin: |
| 19 | + for row_num, row in enumerate(fin): |
| 20 | + row = row.decode().split("\t") |
| 21 | + _json = json.loads(row[header["JSON"]]) |
| 22 | + if _json["type"]["key"] != "/type/edition": |
| 23 | + continue |
| 24 | + |
| 25 | + if "isbn_10" in _json: |
| 26 | + isbns_10 = _json.get("isbn_10", None) |
| 27 | + else: |
| 28 | + # we only update editions with existing isbn 10s |
| 29 | + continue |
| 30 | + |
| 31 | + if "isbn_13" in _json: |
| 32 | + # we only update editions with no existing isbn 13s (for now at least) |
| 33 | + continue |
| 34 | + |
| 35 | + olid = _json["key"].split("/")[-1] |
| 36 | + edition = self.ol.Edition.get(olid) |
| 37 | + if edition.type["key"] != "/type/edition": |
| 38 | + continue |
| 39 | + |
| 40 | + if hasattr(edition, "isbn_13"): |
| 41 | + # we only update editions with no existing isbn 13s (for now at least) |
| 42 | + continue |
| 43 | + |
| 44 | + isbns_13 = [] |
| 45 | + for isbn in isbns_10: |
| 46 | + canonical = isbnlib.canonical(isbn) |
| 47 | + if isbnlib.is_isbn10(canonical): |
| 48 | + isbn_13 = isbnlib.to_isbn13(canonical) |
| 49 | + if isbnlib.is_isbn13(canonical): |
| 50 | + isbn_13 = canonical |
| 51 | + if isbn_13: |
| 52 | + isbns_13.append(isbn_13) |
| 53 | + |
| 54 | + if len(isbns_13) > 1: |
| 55 | + isbns_13 = dedupe( |
| 56 | + isbns_13 |
| 57 | + ) # remove duplicates, shouldn't normally be necessary |
| 58 | + |
| 59 | + setattr(edition, "isbn_13", isbns_13) |
| 60 | + self.logger.info("\t".join([olid, str(isbns_10), str(isbns_13)])) |
| 61 | + self.save(lambda: edition.save(comment=comment)) |
| 62 | + |
| 63 | + |
| 64 | +def dedupe(input_list: list) -> list: |
| 65 | + """Remove duplicate elements in a list and return the new list""" |
| 66 | + output = [] |
| 67 | + for i in input_list: |
| 68 | + if i not in output: |
| 69 | + output.append(i) |
| 70 | + return output |
| 71 | + |
| 72 | + |
| 73 | +if __name__ == "__main__": |
| 74 | + job = ConvertISBN10to13Job() |
| 75 | + |
| 76 | + try: |
| 77 | + job.run() |
| 78 | + except Exception as e: |
| 79 | + job.logger.exception(e) |
| 80 | + raise e |
0 commit comments