Skip to content

Commit b7f89e7

Browse files
cursoragentdeusyu
andcommitted
fix: retranslate when glossary aliases change
Co-authored-by: Rainman <deusyu@users.noreply.github.com>
1 parent 1bc4407 commit b7f89e7

3 files changed

Lines changed: 32 additions & 3 deletions

File tree

scripts/glossary.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,14 @@ def glossary_hash(glossary):
7474

7575

7676
def term_hash(term):
77-
"""SHA-256 of a single term's identifying fields."""
78-
payload = f"{term.get('source', '')}{term.get('target', '')}|{term.get('category', '')}"
79-
return hashlib.sha256(payload.encode('utf-8')).hexdigest()
77+
"""SHA-256 of fields that affect how a term is injected into prompts."""
78+
payload = {
79+
'source': term.get('source', ''),
80+
'target': term.get('target', ''),
81+
'category': term.get('category', ''),
82+
'aliases': sorted(term.get('aliases', []) or []),
83+
}
84+
return hashlib.sha256(_canonical_json(payload).encode('utf-8')).hexdigest()
8085

8186

8287
def _v2_term_defaults(term):

tests/test_glossary.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,11 @@ def test_term_hash_changes_when_category_changes(self):
625625
t2 = {'source': 'Apple', 'target': '苹果', 'category': 'company'}
626626
self.assertNotEqual(glossary.term_hash(t1), glossary.term_hash(t2))
627627

628+
def test_term_hash_changes_when_aliases_change(self):
629+
t1 = {'source': 'Tai', 'target': '太一', 'category': 'person', 'aliases': []}
630+
t2 = {'source': 'Tai', 'target': '太一', 'category': 'person', 'aliases': ['Taichi']}
631+
self.assertNotEqual(glossary.term_hash(t1), glossary.term_hash(t2))
632+
628633

629634
class FormatTermsForPromptTests(unittest.TestCase):
630635
def test_empty_terms_returns_empty_string(self):

tests/test_run_state.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,25 @@ def test_new_alias_that_hits_chunk_changes_term_selection(self):
131131

132132
self.assertEqual(plan["translation_chunk_ids"], ["chunk0001"])
133133

134+
def test_new_alias_retranslates_chunk_when_source_already_selected_term(self):
135+
tmp = tempfile.TemporaryDirectory()
136+
with tmp:
137+
temp_dir = Path(tmp.name)
138+
self._write(temp_dir / "input.md", "Tai and Taichi appear.\n")
139+
self._write(temp_dir / "chunk0001.md", "Tai and Taichi appear.\n")
140+
self._write(temp_dir / "output_chunk0001.md", "太一和 Taichi 出现。\n")
141+
create_manifest(str(temp_dir), ["chunk0001.md"], str(temp_dir / "input.md"))
142+
self._write(temp_dir / "glossary.json", json.dumps(glossary_doc(), ensure_ascii=False))
143+
run_state.record_chunks(str(temp_dir), ["chunk0001"])
144+
145+
self._write(
146+
temp_dir / "glossary.json",
147+
json.dumps(glossary_doc(aliases=["Taichi"]), ensure_ascii=False),
148+
)
149+
plan = run_state.plan(str(temp_dir))
150+
151+
self.assertEqual(plan["translation_chunk_ids"], ["chunk0001"])
152+
134153
def test_output_hash_change_is_record_only(self):
135154
tmp, temp_dir = self._workspace()
136155
with tmp:

0 commit comments

Comments
 (0)