Skip to content

Commit 43f9e7e

Browse files
If import is from wikisource, don't match editions unless there's already a wikisource ID (#10692)
1 parent de903b9 commit 43f9e7e

File tree

2 files changed

+71
-0
lines changed

2 files changed

+71
-0
lines changed

openlibrary/catalog/add_book/__init__.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,18 @@ def isbns_from_record(rec: dict) -> list[str]:
422422
return isbns
423423

424424

425+
def find_wikisource_src(rec: dict) -> str | None:
426+
if not rec.get('source_records'):
427+
return None
428+
ws_prefix = 'wikisource:'
429+
ws_match = next(
430+
(src for src in rec['source_records'] if src.startswith(ws_prefix)), None
431+
)
432+
if ws_match:
433+
return ws_match[len(ws_prefix) :]
434+
return None
435+
436+
425437
def build_pool(rec: dict) -> dict[str, list[str]]:
426438
"""
427439
Searches for existing edition matches on title and bibliographic keys.
@@ -433,6 +445,13 @@ def build_pool(rec: dict) -> dict[str, list[str]]:
433445
pool = defaultdict(set)
434446
match_fields = ('title', 'oclc_numbers', 'lccn', 'ocaid')
435447

448+
if ws_match := find_wikisource_src(rec):
449+
# If this is a wikisource import, ONLY consider a match if the same wikisource ID
450+
ekeys = set(editions_matched(rec, 'identifiers.wikisource', ws_match))
451+
if ekeys:
452+
pool['wikisource'] = ekeys
453+
return {k: list(v) for k, v in pool.items() if v}
454+
436455
# Find records with matching fields
437456
for field in match_fields:
438457
pool[field] = set(editions_matched(rec, field))
@@ -458,6 +477,14 @@ def find_quick_match(rec: dict) -> str | None:
458477
if 'openlibrary' in rec:
459478
return '/books/' + rec['openlibrary']
460479

480+
if ws_match := find_wikisource_src(rec):
481+
# If this is a wikisource import, ONLY consider a match if the same wikisource ID
482+
ekeys = editions_matched(rec, 'identifiers.wikisource', ws_match)
483+
if ekeys:
484+
return ekeys[0]
485+
else:
486+
return None
487+
461488
ekeys = editions_matched(rec, 'ocaid')
462489
if ekeys:
463490
return ekeys[0]
@@ -498,7 +525,9 @@ def editions_matched(rec: dict, key: str, value=None) -> list[str]:
498525

499526
if value is None:
500527
value = rec[key]
528+
501529
q = {'type': '/type/edition', key: value}
530+
502531
ekeys = list(web.ctx.site.things(q))
503532
return ekeys
504533

openlibrary/catalog/add_book/tests/test_add_book.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,48 @@ def test_editions_matched(mock_site, add_languages, ia_writeback):
130130
assert result == ['/books/OL1M']
131131

132132

133+
def test_force_new_wikisource_edition(mock_site, add_languages, ia_writeback):
134+
rec = {
135+
'title': 'test an import that is not from wikisource',
136+
'isbn_10': ['0190906767'],
137+
'source_records': ['test:002'],
138+
}
139+
load(rec)
140+
141+
ws_rec = {
142+
'title': 'test an import that is not from wikisource',
143+
'isbn_10': ['0190906767'],
144+
'source_records': ['wikisource:en:wikisourceidentifier'],
145+
'identifiers': {'wikisource': ['en:wikisourceidentifier']},
146+
}
147+
148+
# empty pool because the existing record is not from wikisource
149+
150+
pool = build_pool(ws_rec)
151+
assert pool == {}
152+
153+
154+
def test_match_wikisource_edition(mock_site, add_languages, ia_writeback):
155+
rec = {
156+
'title': 'test an import that already has a wikisource identifier',
157+
'isbn_10': ['0190906768'],
158+
'source_records': ['test:003'],
159+
'identifiers': {'wikisource': ['en:wikisourceidentifier2']},
160+
}
161+
load(rec)
162+
163+
ws_rec = {
164+
'title': 'test a wikisource record against editions that have wikisource identifiers',
165+
'isbn_10': ['0190906768'],
166+
'source_records': ['wikisource:en:wikisourceidentifier2'],
167+
}
168+
169+
# existing edition already has a wikisource identifier, so an incoming wikisource source record should match it
170+
171+
result = editions_matched(ws_rec, 'isbn_10', '0190906768')
172+
assert result == ['/books/OL1M']
173+
174+
133175
def test_load_without_required_field():
134176
rec = {'ocaid': 'test item'}
135177
pytest.raises(RequiredField, load, {'ocaid': 'test_item'})

0 commit comments

Comments
 (0)