Skip to content

Commit 7ac77b5

Browse files
authored
fix: Implement dictionary merging in merge_property to correctly combine remote_ids during author merges. (#11747)
1 parent 2f9b0ce commit 7ac77b5

File tree

2 files changed

+40
-0
lines changed

2 files changed

+40
-0
lines changed

openlibrary/plugins/upstream/merge_authors.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,12 @@ def merge_docs(self, master, dup):
115115
def merge_property(self, a, b):
116116
if isinstance(a, list) and isinstance(b, list):
117117
return uniq(a + b, key=dicthash)
118+
elif isinstance(a, dict) and isinstance(b, dict):
119+
# Merge dictionaries (e.g., remote_ids)
120+
# Master (a) values take preference over duplicate (b) values
121+
result = b.copy()
122+
result.update(a)
123+
return result
118124
elif not a:
119125
return b
120126
else:

openlibrary/plugins/upstream/tests/test_merge_authors.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,16 @@ def test_merge_property(self):
126126
assert engine.merge_property("foo", "bar") == "foo"
127127
assert engine.merge_property(["foo"], ["bar"]) == ["foo", "bar"]
128128
assert engine.merge_property(None, ["bar"]) == ["bar"]
129+
# Test dict merging (for remote_ids)
130+
assert engine.merge_property({}, {"wikidata": "Q123"}) == {"wikidata": "Q123"}
131+
assert engine.merge_property({"viaf": "123"}, {"wikidata": "Q456"}) == {
132+
"viaf": "123",
133+
"wikidata": "Q456",
134+
}
135+
# When both have the same key, master's value should take preference
136+
assert engine.merge_property({"wikidata": "Q111"}, {"wikidata": "Q222"}) == {
137+
"wikidata": "Q111"
138+
}
129139

130140

131141
def test_get_many():
@@ -297,6 +307,30 @@ def test_work_authors(self):
297307
"authors": [{"type": "/type/author_role", "author": {"key": "/authors/a"}}],
298308
}
299309

310+
def test_remote_ids_merge_wikidata_from_duplicate(self):
311+
"""Test that Wikidata ID from duplicate author is merged into master.
312+
313+
This reproduces the bug where remote_ids.wikidata was lost during merge.
314+
See: https://github.com/internetarchive/openlibrary/issues/11698
315+
"""
316+
# Master author has no remote_ids (or empty dict)
317+
a = dict(TEST_AUTHORS.a)
318+
319+
# Duplicate author has a Wikidata ID
320+
b = dict(TEST_AUTHORS.b, remote_ids={"wikidata": "Q12345"})
321+
322+
c = dict(TEST_AUTHORS.c, remote_ids={"wikidata": "Q12346", "viaf": "123456"})
323+
324+
web.ctx.site.add([a, b, c])
325+
self.engine.merge("/authors/a", ["/authors/b", "/authors/c"])
326+
327+
# The Wikidata ID should be merged into the master
328+
master_remote_ids = dict(web.ctx.site.get("/authors/a").get('remote_ids'))
329+
assert master_remote_ids == {
330+
"wikidata": "Q12345",
331+
"viaf": "123456",
332+
}, "remote_ids from duplicate should be merged into master"
333+
300334

301335
def test_dicthash():
302336
assert dicthash({}) == dicthash({})

0 commit comments

Comments
 (0)