Skip to content

Commit 6fa2c1a

Browse files
committed
Update import API to use key/remote_ids instead of ol_id/identifiers to match type schema
1 parent 37f173e commit 6fa2c1a

File tree

3 files changed

+16
-29
lines changed

3 files changed

+16
-29
lines changed

openlibrary/catalog/add_book/load_book.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -161,20 +161,16 @@ def get_redirected_authors(authors: list["Author"]):
161161
return authors
162162

163163
# Look for OL ID first.
164-
if (key := author.get("ol_id")) and (
165-
reply := list(
166-
web.ctx.site.things({"type": "/type/author", "key~": f'/authors/{key}'})
167-
)
168-
):
164+
if (key := author.get("key")) and (record := web.ctx.site.get(key)):
169165
# Always match on OL ID, even if remote identifiers don't match.
170-
return get_redirected_authors(list(web.ctx.site.get_many(reply)))
166+
return get_redirected_authors([record])
171167

172168
# Try other identifiers next.
173-
if identifiers := author.get("identifiers"):
169+
if remote_ids := author.get("remote_ids"):
174170
queries = []
175171
matched_authors = []
176172
# Get all the authors that match any incoming identifier.
177-
for identifier, val in identifiers.items():
173+
for identifier, val in remote_ids.items():
178174
queries.append({"type": "/type/author", f"remote_ids.{identifier}": val})
179175
for query in queries:
180176
if reply := list(web.ctx.site.things(query)):
@@ -186,7 +182,7 @@ def get_redirected_authors(authors: list["Author"]):
186182
highest_matches = 0
187183
selected_match = None
188184
for a in matched_authors:
189-
_, matches = a.merge_remote_ids(identifiers)
185+
_, matches = a.merge_remote_ids(remote_ids)
190186
if matches > highest_matches:
191187
selected_match = a
192188
highest_matches = matches
@@ -253,9 +249,9 @@ def find_entity(author: dict[str, Any]) -> "Author | None":
253249
"""
254250
assert isinstance(author, dict)
255251
things = find_author(author)
256-
if "identifiers" in author:
252+
if "remote_ids" in author:
257253
for index, t in enumerate(things):
258-
t.remote_ids, _ = t.merge_remote_ids(author["identifiers"])
254+
t.remote_ids, _ = t.merge_remote_ids(author["remote_ids"])
259255
things[index] = t
260256
return things[0] if things else None
261257

@@ -316,9 +312,6 @@ def import_author(author: dict[str, Any], eastern=False) -> "Author | dict[str,
316312
):
317313
if f in author:
318314
a[f] = author[f]
319-
# Import record hitting endpoint should list external IDs under "identifiers", but needs to be "remote_ids" when going into the DB.
320-
if "identifiers" in author:
321-
a["remote_ids"] = author["identifiers"]
322315
return a
323316

324317

openlibrary/catalog/add_book/tests/test_load_book.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,8 @@ def test_first_match_ol_key(self, mock_site):
165165
# We ideally shouldn't ever have a case where different authors have the same VIAF, but this demonstrates priority.
166166
searched_author = {
167167
"name": "William H. Brewer",
168-
"ol_id": "OL4A",
169-
"identifiers": {"viaf": "12345678"},
168+
"key": "/authors/OL4A",
169+
"remote_ids": {"viaf": "12345678"},
170170
}
171171
found = import_author(searched_author)
172172
assert found.key == author_different_key["key"]
@@ -198,7 +198,7 @@ def test_second_match_remote_identifier(self, mock_site):
198198
# Look for exact match on VIAF, regardless of name field.
199199
searched_author = {
200200
"name": "William Brewer",
201-
"identifiers": {"viaf": "12345678"},
201+
"remote_ids": {"viaf": "12345678"},
202202
}
203203
found = import_author(searched_author)
204204
assert found.key == author["key"]

scripts/providers/import_wikisource.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,8 @@ def format_contributor(raw_name: str) -> str:
221221
@dataclass
222222
class Author:
223223
friendly_name: str | None = None
224-
ol_id: str | None = None
225-
identifiers: dict[str, str] = field(default_factory=dict[str, str])
224+
key: str | None = None
225+
remote_ids: dict[str, str] = field(default_factory=dict[str, str])
226226
birth_date: str | None = None
227227
death_date: str | None = None
228228

@@ -307,14 +307,8 @@ def to_dict(self):
307307
"name": author.friendly_name,
308308
**({"birth_date": author.birth_date} if author.birth_date else {}),
309309
**({"death_date": author.death_date} if author.death_date else {}),
310-
**(
311-
{
312-
"identifiers": author.identifiers,
313-
}
314-
if author.identifiers
315-
else {}
316-
),
317-
**({"ol_id": author.ol_id} if author.ol_id else {}),
310+
**({"remote_ids": author.remote_ids} if author.remote_ids else {}),
311+
**({"key": author.key} if author.key else {}),
318312
}
319313
for author in self.authors
320314
]
@@ -829,7 +823,7 @@ def fix_contributor_data(
829823
contributor.death_date = extract_year(obj["deathDate"]["value"])
830824

831825
if "olId" in obj and "value" in obj["olId"]:
832-
contributor.ol_id = obj["olId"]["value"]
826+
contributor.key = f"/authors/{obj["olId"]["value"]}"
833827

834828
# Couldn't find inventaire
835829
for id in [
@@ -852,7 +846,7 @@ def fix_contributor_data(
852846
val = obj[id]["value"]
853847
if id == "youtube" and val[0] != "@":
854848
val = f'@{val}'
855-
contributor.identifiers[id] = val
849+
contributor.remote_ids[id] = val
856850

857851
if contributor_id in map:
858852
book_ids = map[contributor_id]

0 commit comments

Comments
 (0)