Skip to content

Commit d446a6c

Browse files
author
nesquena-hermes
committed
Merge PR nesquena#3049
# Conflicts: # CHANGELOG.md
2 parents 0fd12b2 + 2aeebf5 commit d446a6c

3 files changed

Lines changed: 29 additions & 3 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
- Local fallback title generation no longer has a German-only `Session Bilder` special case; it now uses the same generic topic extraction path as other fallback titles. (Refs #3040)
99
- Title-generation prompts now use the same language-neutral "match the user language" instruction for every locale instead of adding German-only exemplars. (Refs #3040)
1010

11+
### Fixed
12+
13+
- Title-language detection no longer treats common English tech/jargon text such as "session die" or DAS/DER references as German just because of shared tokens. (Refs #3040)
14+
1115
## [v0.51.152] — 2026-05-28 — Release DX (stage-batch34 — single-PR optional gateway-backed browser chat)
1216

1317
### Added

api/streaming.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,12 +1385,12 @@ def _detect_title_language(text: str) -> str:
13851385
return ''
13861386
german_markers = {
13871387
'warum', 'werden', 'wird', 'wurde', 'hier', 'nicht', 'mehr', 'alte', 'alten',
1388-
'bilder', 'angezeigt', 'session', 'prüfe', 'ich', 'die', 'der', 'das', 'den',
1389-
'und', 'oder', 'mit', 'für', 'von', 'zu', 'ist', 'sind', 'bitte', 'kannst',
1388+
'bilder', 'angezeigt', 'prüfe', 'ich', 'und', 'oder', 'mit', 'für', 'von',
1389+
'zu', 'ist', 'sind', 'bitte', 'kannst',
13901390
}
13911391
tokens = re.findall(r'[A-Za-zÀ-ÖØ-öø-ÿ]+', s)
13921392
german_hits = sum(1 for tok in tokens if tok in german_markers)
1393-
if re.search(r'[äöüß]', s) or german_hits >= 2:
1393+
if re.search(r'[äöüß]', s) or german_hits >= 3:
13941394
return 'de'
13951395
return ''
13961396

tests/test_title_aux_routing.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,28 @@ def test_title_prompt_language_rule_is_same_for_supported_locales(self):
245245
with self.subTest(text=text):
246246
self.assertEqual(_title_prompt_language_rule(text), expected)
247247

248+
def test_title_language_detection_avoids_english_tech_false_positives(self):
249+
"""English tech/jargon text must not be classified as German by shared tokens."""
250+
from api.streaming import _detect_title_language
251+
252+
examples = [
253+
'Why did the session die after the DAS storage failover?',
254+
'The session can die when DAS storage disconnects.',
255+
'Debug the session and DER certificate import failure.',
256+
]
257+
for text in examples:
258+
with self.subTest(text=text):
259+
self.assertEqual(_detect_title_language(text), '')
260+
261+
def test_title_language_detection_keeps_german_without_umlaut(self):
262+
"""German without umlauts still needs a language hint when evidence is specific."""
263+
from api.streaming import _detect_title_language
264+
265+
self.assertEqual(
266+
_detect_title_language('Warum werden hier die Bilder der alten Session nicht angezeigt?'),
267+
'de',
268+
)
269+
248270
def test_german_source_rejects_english_aux_title(self):
249271
"""Regression: an English aux title must not overwrite a German conversation."""
250272
from api.streaming import _generate_llm_session_title_via_aux

0 commit comments

Comments
 (0)