Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.22 on 2025-10-06 20:59

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("base", "0100_android_as_mf2"),
]

operations = [
migrations.AddField(
model_name="translation",
name="value",
field=models.JSONField(default=list),
),
migrations.AddField(
model_name="translation",
name="properties",
field=models.JSONField(blank=True, null=True),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from html import escape
from json import loads
from math import ceil
from re import compile

from moz.l10n.formats import Format
from moz.l10n.formats.fluent import fluent_parse_entry
from moz.l10n.message import message_to_json, parse_message
from moz.l10n.model import CatchallKey, Message, PatternMessage, SelectMessage

from django.db import migrations, models


android_nl = compile(r"\s*\n\s*")
android_esc = compile(r"(?<!\\)\\([nt])\s*")

batch_size = 10000


def parse_pontoon_message(trans) -> Message:
string = trans.string
xliff_is_xcode = False
match trans.format:
case "webext":
try:
return parse_message(Format.webext, string)
except ValueError:
meta = trans.entity.meta # Only queried if required
ph = next((loads(v) for k, v in meta if k == "placeholders"), None)
return parse_message(Format.webext, string, webext_placeholders=ph)
case "lang" | "properties" | "":
return PatternMessage([string])
case "android" | "gettext":
format = Format.mf2
case "xcode":
format = Format.xliff
string = escape(string)
xliff_is_xcode = True
case "xliff":
format = Format.xliff
string = escape(string)
case _:
format = Format[trans.format]
msg = parse_message(format, string, xliff_is_xcode=xliff_is_xcode)
if isinstance(msg, SelectMessage):
for keys in msg.variants:
if isinstance(keys[0], CatchallKey):
# MF2 syntax does not retain the catchall name/label
keys[0].value = "other"
return msg


def set_value_and_properties(apps, schema_editor):
Resource = apps.get_model("base", "Resource")
Translation = apps.get_model("base", "Translation")

batch_total = ceil(Translation.objects.count() / batch_size)
batch_count = 0

def print_progress():
nonlocal batch_count
if batch_count % 10 == 0:
print(f".({(batch_count / batch_total):.1%})", end="", flush=True)
else:
print(".", end="", flush=True)
batch_count += 1

pv_trans = []
v_trans = []
format_q = models.Subquery(
Resource.objects.filter(id=models.OuterRef("entity__resource_id")).values(
"format"
)
)
for trans in Translation.objects.annotate(format=format_q).iterator():
try:
if trans.format == "fluent":
fe = fluent_parse_entry(trans.string, with_linepos=False)
trans.value = message_to_json(fe.value)
trans.properties = {
name: message_to_json(msg) for name, msg in fe.properties.items()
} or None
if trans.properties:
pv_trans.append(trans)
else:
v_trans.append(trans)
else:
msg = parse_pontoon_message(trans)
trans.value = message_to_json(msg)
v_trans.append(trans)
except Exception:
if (
trans.approved
and not trans.entity.obsolete
and not trans.entity.resource.project.disabled
):
print(
f"\nUsing fallback value for approved and active {trans.format} translation {trans.pk} "
f"for entity {trans.entity.pk}, locale {trans.locale.code}:\n{trans.string}",
flush=True,
)
trans.value = [trans.string]
v_trans.append(trans)
if len(pv_trans) == batch_size:
Translation.objects.bulk_update(pv_trans, ["value", "properties"])
pv_trans.clear()
print_progress()
if len(v_trans) == batch_size:
Translation.objects.bulk_update(v_trans, ["value"])
v_trans.clear()
print_progress()
if pv_trans:
Translation.objects.bulk_update(pv_trans, ["value", "properties"])
print_progress()
if v_trans:
Translation.objects.bulk_update(v_trans, ["value"])
print_progress()


class Migration(migrations.Migration):
dependencies = [
("base", "0101_add_translation_value_and_properties_schema"),
]

operations = [
migrations.RunPython(
set_value_and_properties, reverse_code=migrations.RunPython.noop
),
]
2 changes: 2 additions & 0 deletions pontoon/base/models/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ class Translation(DirtyFieldsMixin, models.Model):
locale = models.ForeignKey(Locale, models.CASCADE)
user = models.ForeignKey(User, models.SET_NULL, null=True, blank=True)
string = models.TextField()
value = models.JSONField(default=list)
properties = models.JSONField(null=True, blank=True)
date = models.DateTimeField(default=timezone.now)

# Active translations are displayed in the string list and as the first
Expand Down
53 changes: 31 additions & 22 deletions pontoon/sync/core/translations_from_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from fluent.syntax import FluentParser
from moz.l10n.formats import l10n_extensions
from moz.l10n.message import message_to_json
from moz.l10n.model import Id as L10nId
from moz.l10n.paths import L10nConfigPaths, L10nDiscoverPaths, parse_android_locale
from moz.l10n.resource import parse_resource
Expand All @@ -32,13 +33,13 @@
from pontoon.checks.utils import bulk_run_checks
from pontoon.sync.core.checkout import Checkout, Checkouts
from pontoon.sync.core.paths import UploadPaths
from pontoon.sync.formats import as_vcs_translations
from pontoon.sync.formats import RepoTranslation, as_repo_translations


log = logging.getLogger(__name__)

Updates = dict[tuple[int, int], tuple[str | None, bool]]
""" (entity.id, locale.id) -> (string, fuzzy) """
Updates = dict[tuple[int, int], RepoTranslation | None]
""" (entity.id, locale.id) -> RepoTranslation """


def sync_translations_from_repo(
Expand Down Expand Up @@ -129,7 +130,7 @@ def find_db_updates(
db_changes: Iterable[ChangedEntityLocale],
) -> Updates | None:
"""
`(entity.id, locale.id) -> (string|None, fuzzy)`
`(entity.id, locale.id) -> RepoTranslation`

Translations in changed resources, excluding:
- Exact matches with previous approved or pretranslated translations
Expand All @@ -140,8 +141,8 @@ def find_db_updates(
resource_paths: set[str] = set()
# db_path -> {locale.id}
translated_resources: dict[str, set[int]] = defaultdict(set)
# (db_path, tx.key, locale.id) -> (string|None, fuzzy)
translations: dict[tuple[str, L10nId, int], tuple[str | None, bool]] = {}
# (db_path, tx.key, locale.id) -> RepoTranslation|None
translations: dict[tuple[str, L10nId, int], RepoTranslation | None] = {}
for target_path in changed_target_paths:
ref = paths.find_reference(target_path)
if ref:
Expand All @@ -161,8 +162,8 @@ def find_db_updates(
resource_paths.add(db_path)
translated_resources[db_path].add(locale.pk)
translations.update(
((db_path, tx.key, locale.pk), (tx.string, tx.fuzzy))
for tx in as_vcs_translations(l10n_res)
((db_path, rt.key, locale.pk), rt)
for rt in as_repo_translations(l10n_res)
)
except Exception as error:
scope = f"[{project.slug}:{db_path}, {locale.code}]"
Expand Down Expand Up @@ -215,18 +216,18 @@ def find_db_updates(
trans_values["locale_id"],
)
if key in translations:
string, _ = translations[key]
if translations_equal(
rt = translations[key]
if rt is not None and translations_equal(
project,
key[0],
trans_values["entity__resource__format"],
string,
rt.string,
trans_values["string"],
):
del translations[key]
else:
# The translation has been removed from the repo
translations[key] = (None, False)
translations[key] = None
if paginator.num_pages > 3:
log.debug(
f"[{project.slug}] Filtering matches from translations... {page_number}/{paginator.num_pages}"
Expand Down Expand Up @@ -258,10 +259,10 @@ def find_db_updates(
.iterator()
}
updates: Updates = {}
for (db_path, ent_key, locale_id), tx in translations.items():
for (db_path, ent_key, locale_id), rt in translations.items():
entity_id = entities.get((db_path, ent_key), None)
if entity_id is not None:
updates[(entity_id, locale_id)] = tx
updates[(entity_id, locale_id)] = rt
log.debug(f"[{project.slug}] Compiling updates... Found {len(updates)}")
return updates

Expand Down Expand Up @@ -302,14 +303,14 @@ def update_db_translations(
# Approve matching suggestions
matching_suggestions_q = Q()
repo_rm_count = 0
for (entity_id, locale_id), (string, _) in repo_translations.items():
if string is None:
for (entity_id, locale_id), rt in repo_translations.items():
if rt is None:
# The translation has been removed from the repo
translations_to_reject |= Q(entity_id=entity_id, locale_id=locale_id)
repo_rm_count += 1
else:
matching_suggestions_q |= Q(
entity_id=entity_id, locale_id=locale_id, string=string
entity_id=entity_id, locale_id=locale_id, string=rt.string
)
# (entity_id, locale_id) => translation
suggestions: dict[tuple[int, int], Translation] = (
Expand All @@ -325,7 +326,8 @@ def update_db_translations(
update_fields: set[str] = set()
approve_count = 0
for tx in suggestions.values():
_, fuzzy = repo_translations[(tx.entity_id, tx.locale_id)]
rt = repo_translations[(tx.entity_id, tx.locale_id)]
fuzzy = rt.fuzzy if rt is not None else False
if fuzzy and tx.fuzzy:
# Keep fuzzy suggestions unchanged
continue
Expand Down Expand Up @@ -371,17 +373,24 @@ def update_db_translations(
new_translations: list[Translation] = []
if repo_translations:
# Add new approved translations for the remainder
for (entity_id, locale_id), (string, fuzzy) in repo_translations.items():
if string is not None:
for (entity_id, locale_id), rt in repo_translations.items():
if rt is not None:
json_properties = (
{key: message_to_json(msg) for key, msg in rt.properties.items()}
if rt.properties
else None
)
tx = Translation(
entity_id=entity_id,
locale_id=locale_id,
string=string,
string=rt.string,
value=message_to_json(rt.value),
properties=json_properties,
date=now,
active=True,
user=user,
)
if fuzzy:
if rt.fuzzy:
tx.fuzzy = True
else:
tx.approved = True
Expand Down
8 changes: 5 additions & 3 deletions pontoon/sync/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from pontoon.base.models import Entity

from .common import VCSTranslation
from .common import RepoTranslation
from .gettext import gettext_as_entity, gettext_as_translation
from .xliff import xliff_as_entity, xliff_as_translation

Expand Down Expand Up @@ -62,7 +62,7 @@ def _as_string(format: Format | None, entry: Entry[Message]) -> str:
return serialize_message(format, entry.value)


def as_vcs_translations(res: MozL10nResource[Message]) -> Iterator[VCSTranslation]:
def as_repo_translations(res: MozL10nResource[Message]) -> Iterator[RepoTranslation]:
for section in res.sections:
if res.format == Format.android and section.id:
continue
Expand All @@ -74,9 +74,11 @@ def as_vcs_translations(res: MozL10nResource[Message]) -> Iterator[VCSTranslatio
case Format.xliff:
tx = xliff_as_translation(section.id, entry)
case _:
tx = VCSTranslation(
tx = RepoTranslation(
key=section.id + entry.id,
string=_as_string(res.format, entry),
value=entry.value,
properties=entry.properties,
)
if tx is not None:
yield tx
Expand Down
6 changes: 5 additions & 1 deletion pontoon/sync/formats/common.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from dataclasses import dataclass

from moz.l10n.model import Message


@dataclass
class VCSTranslation:
class RepoTranslation:
"""
A single translation of a source string into another language.
"""

key: tuple[str, ...]
string: str
value: Message
properties: dict[str, Message] | None = None
fuzzy: bool = False
4 changes: 2 additions & 2 deletions pontoon/sync/formats/gettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@

from pontoon.base.models import Entity

from .common import VCSTranslation
from .common import RepoTranslation


def gettext_as_translation(entry: Entry[Message]):
if entry.value.is_empty():
return None
string = serialize_message(Format.mf2, entry.value)
fuzzy = any(m.key == "flag" and m.value == "fuzzy" for m in entry.meta)
return VCSTranslation(key=entry.id, string=string, fuzzy=fuzzy)
return RepoTranslation(key=entry.id, string=string, value=entry.value, fuzzy=fuzzy)


def gettext_as_entity(entry: Entry[Message], kwargs: dict[str, Any]) -> Entity:
Expand Down
8 changes: 6 additions & 2 deletions pontoon/sync/formats/xliff.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,17 @@

from pontoon.base.models import Entity

from .common import VCSTranslation
from .common import RepoTranslation


def xliff_as_translation(section_id: L10nId, entry: Entry):
# Here, entry.value is from the <target>
string = unescape(serialize_message(Format.xliff, entry.value))
return VCSTranslation(key=section_id + entry.id, string=string) if string else None
return (
RepoTranslation(key=section_id + entry.id, string=string, value=entry.value)
if string
else None
)


def xliff_as_entity(
Expand Down
Loading