Skip to content

Commit c5a3871

Browse files
committed
feat(machinery): more robust response handling for llms
Gracefully handle when metadata is leaked to the response.
1 parent 1998400 commit c5a3871

5 files changed

Lines changed: 82 additions & 10 deletions

File tree

weblate/machinery/anthropic.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,9 @@ def check_failure(self, response) -> None:
5050
def fetch_llm_translations(
5151
self, prompt: str, content: str, previous_content: str, previous_response: str
5252
) -> str | None:
53+
model = self.get_traced_model()
5354
payload = {
54-
"model": self.get_model(),
55+
"model": model,
5556
"max_tokens": self.settings.get("max_tokens", 4096),
5657
"system": prompt,
5758
"messages": [

weblate/machinery/llm.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,17 +121,17 @@
121121
7. Do not transliterate or explain translations.
122122
8. Output must be entirely in the target_language except preserved placeholders.
123123
9. Output must be valid JSON.
124-
10. Output must be a single JSON array of strings.
124+
10. Output must be a single JSON array containing only JSON strings.
125125
11. Do not include markdown code fences or any additional text.
126-
12. The number of output elements must exactly match the number of input strings. Do not emit empty extra strings.
126+
12. The number of output elements must exactly match the number of input strings. Do not emit empty extra strings, objects, diagnostics, explanations, or metadata.
127127
13. Ensure all output strings are properly JSON-escaped.
128128
14. Internally verify placeholder integrity and JSON validity before responding.
129129
15. Placeholder contract: Tokens like @@PH44@@ are opaque atoms. Never translate, inflect, split, rename, reorder characters inside, wrap, or escape them. Never convert them to another syntax.
130130
16. Markup contract: Preserve markup, tags, attributes, entities, and similar control sequences exactly. Translate only human-readable text outside markup and outside placeholder tokens.
131131
17. Output contract: Return exactly one JSON array of strings, with no characters before `[` or after `]`.
132-
18. Treat context, key, explanation, secondary, plural, failing_checks, and placeholders fields as reference material only. Do not translate them directly and do not add their contents unless they are present in source.
132+
18. Treat context, key, explanation, secondary, plural, failing_checks, and placeholders fields as reference material only. Do not translate them directly and do not add, copy, or emit their contents unless they are present in source.
133133
19. Placeholder mappings explain what opaque placeholder tokens represent. This information may guide wording, but the output must still contain the exact placeholder tokens, not the mapped content.
134-
20. Failing checks describe issues to avoid or fix when improving an existing translation.
134+
20. Failing checks describe issues to avoid or fix when improving an existing translation. They are context only; do not include their check_id, name, description, or generated diagnostics in output.
135135
21. Target-language project instructions, when present above, contain additional requirements for the target language. Follow them unless they conflict with preserving the source meaning, placeholders, markup, or output contract.
136136
22. For translatable markup placeholders that wrap text, translate the whole text between the placeholders. Example: @@PH1@@Reset and reapply@@PH2@@ can become @@PH1@@Zurucksetzen und erneut anwenden@@PH2@@, never @@PH1@@Zurucksetzen und @@PH2@@erneut anwenden@@PH2@@.
137137
@@ -141,7 +141,7 @@
141141
Invalid placeholder handling:
142142
["Click <a href=\"/x\">log out</a> and use \\@\\@PH195\\@\\@."]
143143
144-
Respond ONLY with a valid JSON array of strings, one per input string, in the same order:
144+
Respond ONLY with a valid JSON array of strings, one per input string, in the same order. Do not include JSON objects or any values other than strings:
145145
146146
["translation 1", "translation 2", ...]
147147
"""
@@ -248,6 +248,14 @@ def fetch_llm_translations(
248248
) -> str | None:
249249
raise NotImplementedError
250250

251+
def get_model(self) -> str:
252+
raise NotImplementedError
253+
254+
def get_traced_model(self) -> str:
255+
model = self.get_model()
256+
add_breadcrumb(self.name, "model", model=model)
257+
return model
258+
251259
@staticmethod
252260
def _normalize_context_text(text: str | None) -> str:
253261
if text is None:
@@ -1515,8 +1523,11 @@ def _normalize_translations(
15151523
if (
15161524
isinstance(translations, list)
15171525
and len(translations) > expected_length
1518-
and all(isinstance(item, str) for item in translations)
1519-
and not any(translations[expected_length:])
1526+
and all(isinstance(item, str) for item in translations[:expected_length])
1527+
and not any(
1528+
isinstance(item, str) and item
1529+
for item in translations[expected_length:]
1530+
)
15201531
):
15211532
return translations[:expected_length]
15221533
return translations

weblate/machinery/ollama.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ def get_model(self) -> str:
3232
def fetch_llm_translations(
3333
self, prompt: str, content: str, previous_content: str, previous_response: str
3434
) -> str | None:
35+
model = self.get_traced_model()
3536
payload = {
36-
"model": self.get_model(),
37+
"model": model,
3738
"messages": [
3839
{"role": "system", "content": prompt},
3940
{"role": "user", "content": previous_content},

weblate/machinery/openai.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,12 @@ def fetch_llm_translations(
4444
{"role": "user", "content": content},
4545
]
4646
self.validate_runtime_url(self.get_runtime_base_url())
47+
model = self.get_traced_model()
4748
response = self.request(
4849
"post",
4950
self.get_chat_completions_url(),
5051
json={
51-
"model": self.get_model(),
52+
"model": model,
5253
"messages": messages,
5354
},
5455
)

weblate/machinery/tests.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
from weblate.machinery.llm import (
7070
LLM_CURATED_PREVIOUS_EXAMPLE_SOURCES,
7171
LLM_NEUTRAL_PREVIOUS_EXAMPLE_SOURCES,
72+
PROMPT,
7273
)
7374
from weblate.machinery.microsoft import MicrosoftCognitiveTranslation
7475
from weblate.machinery.modernmt import ModernMTTranslation
@@ -3220,6 +3221,7 @@ class OpenAITranslationTest(BaseMachineTranslationTest):
32203221
"persona": "",
32213222
"style": "",
32223223
}
3224+
TRACE_MODEL: ClassVar[str] = "gpt-5-nano"
32233225

32243226
def mock_empty(self) -> NoReturn:
32253227
self.skipTest("Not tested")
@@ -3274,6 +3276,39 @@ def mock_response(self, content: str = '["Ahoj světe"]') -> None:
32743276
},
32753277
)
32763278

3279+
def test_prompt_forbids_metadata_output(self) -> None:
3280+
self.assertIn("only JSON strings", PROMPT)
3281+
self.assertIn(
3282+
"Do not emit empty extra strings, objects, diagnostics, explanations, "
3283+
"or metadata.",
3284+
PROMPT,
3285+
)
3286+
self.assertIn(
3287+
"do not include their check_id, name, description, or generated "
3288+
"diagnostics in output",
3289+
PROMPT,
3290+
)
3291+
self.assertIn(
3292+
"Do not include JSON objects or any values other than strings",
3293+
PROMPT,
3294+
)
3295+
3296+
@responses.activate
3297+
def test_translate_traces_resolved_model_breadcrumb(self) -> None:
3298+
self.mock_response()
3299+
machine = self.get_machine()
3300+
3301+
with patch("weblate.machinery.llm.add_breadcrumb") as mock_add_breadcrumb:
3302+
machine.download_multiple_translations("en", "fr", [("Hello", None)])
3303+
3304+
model_call = next(
3305+
call
3306+
for call in mock_add_breadcrumb.call_args_list
3307+
if call.args[:2] == (machine.name, "model")
3308+
)
3309+
self.assertEqual(model_call.kwargs["model"], self.TRACE_MODEL)
3310+
self.assertNotIn("key", model_call.kwargs)
3311+
32773312
def test_translate_sends_unit_context(self) -> None:
32783313
machine = self.get_machine()
32793314
unit = make_unit(
@@ -4830,6 +4865,28 @@ def test_translate_rejects_non_empty_extra_reply(self) -> None:
48304865
[("One", None)],
48314866
)
48324867

4868+
@responses.activate
4869+
def test_translate_ignores_trailing_metadata_reply(self) -> None:
4870+
self.mock_response(
4871+
json.dumps(
4872+
[
4873+
"Premier",
4874+
{
4875+
"description": "The following markup is missing.",
4876+
"name": "Inconsistent markup",
4877+
},
4878+
]
4879+
)
4880+
)
4881+
4882+
translation = self.get_machine().download_multiple_translations(
4883+
"en",
4884+
"fr",
4885+
[("One", None)],
4886+
)
4887+
4888+
self.assertEqual(translation["One"][0]["text"], "Premier")
4889+
48334890
def test_translate_rejects_ambiguous_rst_duplicate_placeholders(self) -> None:
48344891
machine = self.get_machine()
48354892

@@ -5252,6 +5309,7 @@ class AzureOpenAITranslationTest(OpenAITranslationTest):
52525309
"style": "",
52535310
"azure_endpoint": "https://my-instance.openai.azure.com",
52545311
}
5312+
TRACE_MODEL: ClassVar[str] = "my-deployment"
52555313

52565314
def mock_response(self, content: str = '["Ahoj světe"]') -> None:
52575315
responses.add(

0 commit comments

Comments
 (0)