Skip to content

Commit 87ef36f

Browse files
committed
Add Translation.value and .properties, with data model contents
1 parent f6e2165 commit 87ef36f

22 files changed

+263
-65
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Generated by Django 4.2.22 on 2025-10-06 20:59
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
dependencies = [
8+
("base", "0100_android_as_mf2"),
9+
]
10+
11+
operations = [
12+
migrations.AddField(
13+
model_name="translation",
14+
name="value",
15+
field=models.JSONField(default=list),
16+
),
17+
migrations.AddField(
18+
model_name="translation",
19+
name="properties",
20+
field=models.JSONField(blank=True, null=True),
21+
),
22+
]
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
from html import escape
2+
from json import loads
3+
from math import ceil
4+
from re import compile
5+
6+
from moz.l10n.formats import Format
7+
from moz.l10n.formats.fluent import fluent_parse_entry
8+
from moz.l10n.message import message_to_json, parse_message
9+
from moz.l10n.model import CatchallKey, Message, PatternMessage, SelectMessage
10+
11+
from django.db import migrations, models
12+
13+
14+
android_nl = compile(r"\s*\n\s*")
15+
android_esc = compile(r"(?<!\\)\\([nt])\s*")
16+
17+
batch_size = 10000
18+
19+
20+
def parse_pontoon_message(trans) -> Message:
21+
string = trans.string
22+
xliff_is_xcode = False
23+
match trans.format:
24+
case "webext":
25+
try:
26+
return parse_message(Format.webext, string)
27+
except ValueError:
28+
meta = trans.entity.meta # Only queried if required
29+
ph = next((loads(v) for k, v in meta if k == "placeholders"), None)
30+
return parse_message(Format.webext, string, webext_placeholders=ph)
31+
case "lang" | "properties" | "":
32+
return PatternMessage([string])
33+
case "android" | "gettext":
34+
format = Format.mf2
35+
case "xcode":
36+
format = Format.xliff
37+
string = escape(string)
38+
xliff_is_xcode = True
39+
case "xliff":
40+
format = Format.xliff
41+
string = escape(string)
42+
case _:
43+
format = Format[trans.format]
44+
msg = parse_message(format, string, xliff_is_xcode=xliff_is_xcode)
45+
if isinstance(msg, SelectMessage):
46+
for keys in msg.variants:
47+
if isinstance(keys[0], CatchallKey):
48+
# MF2 syntax does not retain the catchall name/label
49+
keys[0].value = "other"
50+
return msg
51+
52+
53+
def set_value_and_properties(apps, schema_editor):
54+
Resource = apps.get_model("base", "Resource")
55+
Translation = apps.get_model("base", "Translation")
56+
57+
batch_total = ceil(Translation.objects.count() / batch_size)
58+
batch_count = 0
59+
60+
def print_progress():
61+
nonlocal batch_count
62+
if batch_count % 10 == 0:
63+
print(f".({(batch_count / batch_total):.1%})", end="", flush=True)
64+
else:
65+
print(".", end="", flush=True)
66+
batch_count += 1
67+
68+
pv_trans = []
69+
v_trans = []
70+
format_q = models.Subquery(
71+
Resource.objects.filter(id=models.OuterRef("entity__resource_id")).values(
72+
"format"
73+
)
74+
)
75+
for trans in Translation.objects.annotate(format=format_q).iterator():
76+
try:
77+
if trans.format == "fluent":
78+
fe = fluent_parse_entry(trans.string, with_linepos=False)
79+
trans.value = message_to_json(fe.value)
80+
trans.properties = {
81+
name: message_to_json(msg) for name, msg in fe.properties.items()
82+
} or None
83+
if trans.properties:
84+
pv_trans.append(trans)
85+
else:
86+
v_trans.append(trans)
87+
else:
88+
msg = parse_pontoon_message(trans)
89+
trans.value = message_to_json(msg)
90+
v_trans.append(trans)
91+
except Exception:
92+
if (
93+
trans.approved
94+
and not trans.entity.obsolete
95+
and not trans.entity.resource.project.disabled
96+
):
97+
print(
98+
f"\nUsing fallback value for approved and active {trans.format} translation {trans.pk} "
99+
f"for entity {trans.entity.pk}, locale {trans.locale.code}:\n{trans.string}",
100+
flush=True,
101+
)
102+
trans.value = [trans.string]
103+
v_trans.append(trans)
104+
if len(pv_trans) == batch_size:
105+
Translation.objects.bulk_update(pv_trans, ["value", "properties"])
106+
pv_trans.clear()
107+
print_progress()
108+
if len(v_trans) == batch_size:
109+
Translation.objects.bulk_update(v_trans, ["value"])
110+
v_trans.clear()
111+
print_progress()
112+
if pv_trans:
113+
Translation.objects.bulk_update(pv_trans, ["value", "properties"])
114+
print_progress()
115+
if v_trans:
116+
Translation.objects.bulk_update(v_trans, ["value"])
117+
print_progress()
118+
119+
120+
class Migration(migrations.Migration):
121+
dependencies = [
122+
("base", "0101_add_translation_value_and_properties_schema"),
123+
]
124+
125+
operations = [
126+
migrations.RunPython(
127+
set_value_and_properties, reverse_code=migrations.RunPython.noop
128+
),
129+
]

pontoon/base/models/translation.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ class Translation(DirtyFieldsMixin, models.Model):
105105
locale = models.ForeignKey(Locale, models.CASCADE)
106106
user = models.ForeignKey(User, models.SET_NULL, null=True, blank=True)
107107
string = models.TextField()
108+
value = models.JSONField(default=list)
109+
properties = models.JSONField(null=True, blank=True)
108110
date = models.DateTimeField(default=timezone.now)
109111

110112
# Active translations are displayed in the string list and as the first

pontoon/sync/core/translations_from_repo.py

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from fluent.syntax import FluentParser
99
from moz.l10n.formats import l10n_extensions
10+
from moz.l10n.message import message_to_json
1011
from moz.l10n.model import Id as L10nId
1112
from moz.l10n.paths import L10nConfigPaths, L10nDiscoverPaths, parse_android_locale
1213
from moz.l10n.resource import parse_resource
@@ -32,13 +33,13 @@
3233
from pontoon.checks.utils import bulk_run_checks
3334
from pontoon.sync.core.checkout import Checkout, Checkouts
3435
from pontoon.sync.core.paths import UploadPaths
35-
from pontoon.sync.formats import as_vcs_translations
36+
from pontoon.sync.formats import RepoTranslation, as_repo_translations
3637

3738

3839
log = logging.getLogger(__name__)
3940

40-
Updates = dict[tuple[int, int], tuple[str | None, bool]]
41-
""" (entity.id, locale.id) -> (string, fuzzy) """
41+
Updates = dict[tuple[int, int], RepoTranslation | None]
42+
""" (entity.id, locale.id) -> RepoTranslation """
4243

4344

4445
def sync_translations_from_repo(
@@ -129,7 +130,7 @@ def find_db_updates(
129130
db_changes: Iterable[ChangedEntityLocale],
130131
) -> Updates | None:
131132
"""
132-
`(entity.id, locale.id) -> (string|None, fuzzy)`
133+
`(entity.id, locale.id) -> RepoTranslation`
133134
134135
Translations in changed resources, excluding:
135136
- Exact matches with previous approved or pretranslated translations
@@ -140,8 +141,8 @@ def find_db_updates(
140141
resource_paths: set[str] = set()
141142
# db_path -> {locale.id}
142143
translated_resources: dict[str, set[int]] = defaultdict(set)
143-
# (db_path, tx.key, locale.id) -> (string|None, fuzzy)
144-
translations: dict[tuple[str, L10nId, int], tuple[str | None, bool]] = {}
144+
# (db_path, tx.key, locale.id) -> RepoTranslation|None
145+
translations: dict[tuple[str, L10nId, int], RepoTranslation | None] = {}
145146
for target_path in changed_target_paths:
146147
ref = paths.find_reference(target_path)
147148
if ref:
@@ -161,8 +162,8 @@ def find_db_updates(
161162
resource_paths.add(db_path)
162163
translated_resources[db_path].add(locale.pk)
163164
translations.update(
164-
((db_path, tx.key, locale.pk), (tx.string, tx.fuzzy))
165-
for tx in as_vcs_translations(l10n_res)
165+
((db_path, rt.key, locale.pk), rt)
166+
for rt in as_repo_translations(l10n_res)
166167
)
167168
except Exception as error:
168169
scope = f"[{project.slug}:{db_path}, {locale.code}]"
@@ -215,18 +216,18 @@ def find_db_updates(
215216
trans_values["locale_id"],
216217
)
217218
if key in translations:
218-
string, _ = translations[key]
219-
if translations_equal(
219+
rt = translations[key]
220+
if rt is not None and translations_equal(
220221
project,
221222
key[0],
222223
trans_values["entity__resource__format"],
223-
string,
224+
rt.string,
224225
trans_values["string"],
225226
):
226227
del translations[key]
227228
else:
228229
# The translation has been removed from the repo
229-
translations[key] = (None, False)
230+
translations[key] = None
230231
if paginator.num_pages > 3:
231232
log.debug(
232233
f"[{project.slug}] Filtering matches from translations... {page_number}/{paginator.num_pages}"
@@ -258,10 +259,10 @@ def find_db_updates(
258259
.iterator()
259260
}
260261
updates: Updates = {}
261-
for (db_path, ent_key, locale_id), tx in translations.items():
262+
for (db_path, ent_key, locale_id), rt in translations.items():
262263
entity_id = entities.get((db_path, ent_key), None)
263264
if entity_id is not None:
264-
updates[(entity_id, locale_id)] = tx
265+
updates[(entity_id, locale_id)] = rt
265266
log.debug(f"[{project.slug}] Compiling updates... Found {len(updates)}")
266267
return updates
267268

@@ -302,14 +303,14 @@ def update_db_translations(
302303
# Approve matching suggestions
303304
matching_suggestions_q = Q()
304305
repo_rm_count = 0
305-
for (entity_id, locale_id), (string, _) in repo_translations.items():
306-
if string is None:
306+
for (entity_id, locale_id), rt in repo_translations.items():
307+
if rt is None:
307308
# The translation has been removed from the repo
308309
translations_to_reject |= Q(entity_id=entity_id, locale_id=locale_id)
309310
repo_rm_count += 1
310311
else:
311312
matching_suggestions_q |= Q(
312-
entity_id=entity_id, locale_id=locale_id, string=string
313+
entity_id=entity_id, locale_id=locale_id, string=rt.string
313314
)
314315
# (entity_id, locale_id) => translation
315316
suggestions: dict[tuple[int, int], Translation] = (
@@ -325,7 +326,8 @@ def update_db_translations(
325326
update_fields: set[str] = set()
326327
approve_count = 0
327328
for tx in suggestions.values():
328-
_, fuzzy = repo_translations[(tx.entity_id, tx.locale_id)]
329+
rt = repo_translations[(tx.entity_id, tx.locale_id)]
330+
fuzzy = rt.fuzzy if rt is not None else False
329331
if fuzzy and tx.fuzzy:
330332
# Keep fuzzy suggestions unchanged
331333
continue
@@ -371,17 +373,24 @@ def update_db_translations(
371373
new_translations: list[Translation] = []
372374
if repo_translations:
373375
# Add new approved translations for the remainder
374-
for (entity_id, locale_id), (string, fuzzy) in repo_translations.items():
375-
if string is not None:
376+
for (entity_id, locale_id), rt in repo_translations.items():
377+
if rt is not None:
378+
json_properties = (
379+
{key: message_to_json(msg) for key, msg in rt.properties.items()}
380+
if rt.properties
381+
else None
382+
)
376383
tx = Translation(
377384
entity_id=entity_id,
378385
locale_id=locale_id,
379-
string=string,
386+
string=rt.string,
387+
value=message_to_json(rt.value),
388+
properties=json_properties,
380389
date=now,
381390
active=True,
382391
user=user,
383392
)
384-
if fuzzy:
393+
if rt.fuzzy:
385394
tx.fuzzy = True
386395
else:
387396
tx.approved = True

pontoon/sync/formats/__init__.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from pontoon.base.models import Entity
1818

19-
from .common import VCSTranslation
19+
from .common import RepoTranslation
2020
from .gettext import gettext_as_entity, gettext_as_translation
2121
from .xliff import xliff_as_entity, xliff_as_translation
2222

@@ -62,7 +62,7 @@ def _as_string(format: Format | None, entry: Entry[Message]) -> str:
6262
return serialize_message(format, entry.value)
6363

6464

65-
def as_vcs_translations(res: MozL10nResource[Message]) -> Iterator[VCSTranslation]:
65+
def as_repo_translations(res: MozL10nResource[Message]) -> Iterator[RepoTranslation]:
6666
for section in res.sections:
6767
if res.format == Format.android and section.id:
6868
continue
@@ -74,9 +74,11 @@ def as_vcs_translations(res: MozL10nResource[Message]) -> Iterator[VCSTranslatio
7474
case Format.xliff:
7575
tx = xliff_as_translation(section.id, entry)
7676
case _:
77-
tx = VCSTranslation(
77+
tx = RepoTranslation(
7878
key=section.id + entry.id,
7979
string=_as_string(res.format, entry),
80+
value=entry.value,
81+
properties=entry.properties,
8082
)
8183
if tx is not None:
8284
yield tx

pontoon/sync/formats/common.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
from dataclasses import dataclass
22

3+
from moz.l10n.model import Message
4+
35

46
@dataclass
5-
class VCSTranslation:
7+
class RepoTranslation:
68
"""
79
A single translation of a source string into another language.
810
"""
911

1012
key: tuple[str, ...]
1113
string: str
14+
value: Message
15+
properties: dict[str, Message] | None = None
1216
fuzzy: bool = False

pontoon/sync/formats/gettext.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@
1414

1515
from pontoon.base.models import Entity
1616

17-
from .common import VCSTranslation
17+
from .common import RepoTranslation
1818

1919

2020
def gettext_as_translation(entry: Entry[Message]):
2121
if entry.value.is_empty():
2222
return None
2323
string = serialize_message(Format.mf2, entry.value)
2424
fuzzy = any(m.key == "flag" and m.value == "fuzzy" for m in entry.meta)
25-
return VCSTranslation(key=entry.id, string=string, fuzzy=fuzzy)
25+
return RepoTranslation(key=entry.id, string=string, value=entry.value, fuzzy=fuzzy)
2626

2727

2828
def gettext_as_entity(entry: Entry[Message], kwargs: dict[str, Any]) -> Entity:

pontoon/sync/formats/xliff.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,17 @@
1313

1414
from pontoon.base.models import Entity
1515

16-
from .common import VCSTranslation
16+
from .common import RepoTranslation
1717

1818

1919
def xliff_as_translation(section_id: L10nId, entry: Entry):
2020
# Here, entry.value is from the <target>
2121
string = unescape(serialize_message(Format.xliff, entry.value))
22-
return VCSTranslation(key=section_id + entry.id, string=string) if string else None
22+
return (
23+
RepoTranslation(key=section_id + entry.id, string=string, value=entry.value)
24+
if string
25+
else None
26+
)
2327

2428

2529
def xliff_as_entity(

0 commit comments

Comments
 (0)