Skip to content

Commit 743d62f

Browse files
functionzzmathjazz
andauthored
Pretranslation API (#3834)
This PR implements a Pretranslation API at api/v2/pretranslate/?resource_format=FORMAT&locale=CODE. It utilizes existing pretranslation functionality in Pontoon and extends it as an authenticated service for those who need a pretranslation from either Translation Memory or AutoML. Co-authored-by: Matjaž Horvat <[email protected]>
1 parent 46bdc00 commit 743d62f

File tree

8 files changed

+422
-2
lines changed

8 files changed

+422
-2
lines changed

docs/admin/deployment.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,14 @@ you create:
267267
Optional. Set your `OpenAI API` key to add the ability to refine machine
268268
translations using ChatGPT.
269269

270+
``PERSONAL_ACCESS_TOKEN_MAX_COUNT``
271+
Optional. The maximum number of personal access tokens a user can create.
272+
The default value is 10.
273+
274+
``PRETRANSLATION_API_MAX_CHARS``
275+
Optional. Specifies the maximum length of input text allowed for pretranslation API.
276+
The default value is 2048.
277+
270278
``PROJECT_MANAGERS``
271279
Optional. A list of project manager email addresses to send project requests to
272280

pontoon/api/authentication.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
from rest_framework.authentication import BaseAuthentication
22
from rest_framework.exceptions import AuthenticationFailed
3+
from rest_framework.permissions import BasePermission
34

45
from django.contrib.auth.hashers import check_password
56
from django.utils import timezone
67

78
from pontoon.api.models import PersonalAccessToken
89

910

11+
class IsPretranslator(BasePermission):
12+
def has_permission(self, request, view):
13+
return request.user.groups.filter(name="pretranslators").exists()
14+
15+
1016
class PersonalAccessTokenAuthentication(BaseAuthentication):
1117
def authenticate(self, request):
1218
auth_header = request.headers.get("Authorization")

pontoon/api/tests/test_views.py

Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,12 @@
22

33
from rest_framework.test import APIClient
44

5+
from django.contrib.auth.hashers import make_password
6+
from django.contrib.auth.models import Group
57
from django.db.models import Prefetch
8+
from django.utils.timezone import now, timedelta
69

10+
from pontoon.api.models import PersonalAccessToken
711
from pontoon.base.models.locale import Locale
812
from pontoon.base.models.project import Project
913
from pontoon.base.models.project_locale import ProjectLocale
@@ -1483,3 +1487,321 @@ def test_translation_search(django_assert_num_queries):
14831487
},
14841488
},
14851489
]
1490+
1491+
1492+
@pytest.mark.django_db
1493+
def test_pretranslation_group_authentication(member):
1494+
dummy_group = Group.objects.create(name="dummies")
1495+
1496+
member.user.groups.add(dummy_group)
1497+
token = PersonalAccessToken.objects.create(
1498+
user=member.user,
1499+
name="Test Token 1",
1500+
token_hash="hashed_token",
1501+
expires_at=now() + timedelta(days=1),
1502+
)
1503+
token_id = token.id
1504+
token_unhashed = "unhashed-token"
1505+
token.token_hash = make_password(token_unhashed)
1506+
token.save()
1507+
1508+
# test no pretranslators group
1509+
response = APIClient().post(
1510+
"/api/v2/pretranslate/",
1511+
HTTP_ACCEPT="application/json",
1512+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1513+
)
1514+
1515+
assert response.status_code == 403
1516+
assert response.data == {
1517+
"detail": "You do not have permission to perform this action."
1518+
}
1519+
1520+
1521+
@pytest.mark.django_db
1522+
def test_pretranslation_tm(member):
1523+
pretranslators = Group.objects.get(name="pretranslators")
1524+
member.user.groups.add(pretranslators)
1525+
token = PersonalAccessToken.objects.create(
1526+
user=member.user,
1527+
name="Test Token 1",
1528+
token_hash="hashed_token",
1529+
expires_at=now() + timedelta(days=1),
1530+
)
1531+
token_id = token.id
1532+
token_unhashed = "unhashed-token"
1533+
token.token_hash = make_password(token_unhashed)
1534+
token.save()
1535+
1536+
locale_a = LocaleFactory(
1537+
code="kg",
1538+
name="Klingon",
1539+
)
1540+
project_a = ProjectFactory(
1541+
slug="project_a",
1542+
name="Project A",
1543+
repositories=[],
1544+
)
1545+
resource_a = ResourceFactory.create(
1546+
project=project_a,
1547+
path=f"resource_{project_a.slug}.po",
1548+
format="po",
1549+
)
1550+
entity_a = EntityFactory.create(
1551+
string="Entity A",
1552+
resource=resource_a,
1553+
)
1554+
locale_b = LocaleFactory(
1555+
code="gs",
1556+
name="Geonosian",
1557+
)
1558+
project_b = ProjectFactory(
1559+
slug="project_b",
1560+
name="Project B",
1561+
)
1562+
resource_b = ResourceFactory.create(
1563+
project=project_b,
1564+
path=f"resource_{project_b.slug}.ftl",
1565+
format="fluent",
1566+
)
1567+
entity_b = EntityFactory.create(
1568+
string="Entity B",
1569+
resource=resource_b,
1570+
)
1571+
project_c = ProjectFactory(
1572+
slug="project_c",
1573+
name="Project C",
1574+
)
1575+
resource_c = ResourceFactory.create(
1576+
project=project_c,
1577+
path=f"resource_{project_c.slug}.ftl",
1578+
format="android",
1579+
)
1580+
entity_c = EntityFactory.create(
1581+
string="Entity C",
1582+
resource=resource_c,
1583+
)
1584+
entity_d = EntityFactory.create(
1585+
string="Entity D",
1586+
resource=resource_c,
1587+
)
1588+
TranslationMemoryEntry.objects.create(
1589+
source="Hello",
1590+
target="Hola",
1591+
locale=locale_a,
1592+
project=project_a,
1593+
entity=entity_a,
1594+
)
1595+
TranslationMemoryEntry.objects.create(
1596+
source="{ -object-name } is a test",
1597+
target="{ -object-name } es una prueba",
1598+
locale=locale_a,
1599+
project=project_b,
1600+
entity=entity_b,
1601+
)
1602+
(
1603+
TranslationMemoryEntry.objects.create(
1604+
source="Hello",
1605+
target="Bonjour",
1606+
locale=locale_b,
1607+
project=project_b,
1608+
entity=entity_b,
1609+
),
1610+
)
1611+
(
1612+
TranslationMemoryEntry.objects.create(
1613+
source="The page at %1$s says:",
1614+
target="La página en %1$s dice:",
1615+
locale=locale_b,
1616+
project=project_b,
1617+
entity=entity_c,
1618+
),
1619+
)
1620+
TranslationMemoryEntry.objects.create(
1621+
source="Your app failed validation with {0} error.",
1622+
target="La validación de tu app ha fallado con {0} error:",
1623+
locale=locale_b,
1624+
project=project_c,
1625+
entity=entity_d,
1626+
)
1627+
1628+
# test no locale no text
1629+
response = APIClient().post(
1630+
"/api/v2/pretranslate/",
1631+
HTTP_ACCEPT="application/json",
1632+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1633+
)
1634+
1635+
assert response.status_code == 400
1636+
assert response.data == {
1637+
"locale": ["This field is required."],
1638+
"text": ["This field is required."],
1639+
}
1640+
1641+
# test corrupted input
1642+
corrupted_data = b"\x80\x81\x82" # Invalid UTF-8
1643+
response = APIClient().post(
1644+
"/api/v2/pretranslate/?locale=kg",
1645+
data=corrupted_data,
1646+
content_type="text/plain",
1647+
HTTP_ACCEPT="application/json",
1648+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1649+
)
1650+
1651+
assert response.status_code == 400
1652+
assert response.data == {
1653+
"text": ["Unable to decode request body as UTF-8."],
1654+
}
1655+
1656+
# test string with spaces
1657+
response = APIClient().post(
1658+
"/api/v2/pretranslate/?locale=kg",
1659+
data=" ",
1660+
content_type="text/plain",
1661+
HTTP_ACCEPT="application/json",
1662+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1663+
)
1664+
1665+
assert response.status_code == 400
1666+
assert response.data == {
1667+
"text": ["This field is required."],
1668+
}
1669+
1670+
# test empty string
1671+
response = APIClient().post(
1672+
"/api/v2/pretranslate/?locale=kg",
1673+
data="",
1674+
content_type="text/plain",
1675+
HTTP_ACCEPT="application/json",
1676+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1677+
)
1678+
1679+
assert response.status_code == 400
1680+
assert response.data == {
1681+
"text": ["This field is required."],
1682+
}
1683+
1684+
# test massive character payload
1685+
large_char_data = "a" * 2049 # payload larger than 2048 characters
1686+
response = APIClient().post(
1687+
"/api/v2/pretranslate/?locale=kg",
1688+
data=large_char_data,
1689+
content_type="text/plain",
1690+
HTTP_ACCEPT="application/json",
1691+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1692+
)
1693+
1694+
assert response.status_code == 400
1695+
assert response.data == {
1696+
"text": ["Text exceeds maximum length of 2048 characters."],
1697+
}
1698+
1699+
# test bad resource format
1700+
response = APIClient().post(
1701+
"/api/v2/pretranslate/?locale=kg&resource_format=blah",
1702+
data="Hello",
1703+
content_type="text/plain",
1704+
HTTP_ACCEPT="application/json",
1705+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1706+
)
1707+
1708+
assert response.status_code == 400
1709+
assert response.data == {
1710+
"resource_format": ["Choose a correct resource format."],
1711+
}
1712+
1713+
# test no resource format
1714+
response = APIClient().post(
1715+
"/api/v2/pretranslate/?locale=kg",
1716+
data="Hello",
1717+
content_type="text/plain",
1718+
HTTP_ACCEPT="application/json",
1719+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1720+
)
1721+
1722+
assert response.status_code == 200
1723+
assert response.data == {
1724+
"text": "Hola",
1725+
"author": "tm",
1726+
}
1727+
1728+
# test fluent resource format
1729+
response = APIClient().post(
1730+
"/api/v2/pretranslate/?locale=kg&resource_format=fluent",
1731+
data="testing-alias = { -object-name } is a test",
1732+
content_type="text/plain",
1733+
HTTP_ACCEPT="application/json",
1734+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1735+
)
1736+
1737+
assert response.status_code == 200
1738+
assert response.data == {
1739+
"text": "testing-alias = { -object-name } es una prueba\n",
1740+
"author": "tm",
1741+
}
1742+
1743+
# test incorrect format on fluent
1744+
response = APIClient().post(
1745+
"/api/v2/pretranslate/?locale=kg&resource_format=fluent",
1746+
data="The page at %1$s says:",
1747+
content_type="text/plain",
1748+
HTTP_ACCEPT="application/json",
1749+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1750+
)
1751+
1752+
assert response.status_code == 400
1753+
1754+
# test android resource format
1755+
response = APIClient().post(
1756+
"/api/v2/pretranslate/?locale=gs&resource_format=android",
1757+
data="The page at %1$s says:",
1758+
content_type="text/plain",
1759+
HTTP_ACCEPT="application/json",
1760+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1761+
)
1762+
1763+
assert response.status_code == 200
1764+
assert response.data == {
1765+
"text": "La página en %1$s dice:",
1766+
"author": "tm",
1767+
}
1768+
1769+
# test incorrect format on android
1770+
response = APIClient().post(
1771+
"/api/v2/pretranslate/?locale=gs&resource_format=android",
1772+
data="testing-alias = { -object-name } is a test",
1773+
content_type="text/plain",
1774+
HTTP_ACCEPT="application/json",
1775+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1776+
)
1777+
1778+
assert response.status_code == 400
1779+
1780+
# test gettext resource format
1781+
response = APIClient().post(
1782+
"/api/v2/pretranslate/?locale=gs&resource_format=gettext",
1783+
data="Your app failed validation with {0} error.",
1784+
content_type="text/plain",
1785+
HTTP_ACCEPT="application/json",
1786+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1787+
)
1788+
1789+
assert response.status_code == 200
1790+
assert response.data == {
1791+
"text": "La validación de tu app ha fallado con \\{0\\} error:",
1792+
"author": "tm",
1793+
}
1794+
1795+
# test incorrect format on gettext
1796+
response = APIClient().post(
1797+
"/api/v2/pretranslate/?locale=gs&resource_format=gettext",
1798+
data="testing-alias = { -object-name } is a test",
1799+
content_type="text/plain",
1800+
HTTP_ACCEPT="application/json",
1801+
headers={"Authorization": f"Bearer {token_id}_{token_unhashed}"},
1802+
)
1803+
1804+
assert response.status_code == 400
1805+
1806+
1807+
# Test Google AutoML

pontoon/api/urls.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@
4545
views.EntityIndividualView.as_view(),
4646
name="entity-individual-alternate",
4747
),
48+
path(
49+
# Pretranslation
50+
"pretranslate/",
51+
views.PretranslationView.as_view(),
52+
name="pretranslation",
53+
),
4854
path(
4955
# Terminology Search
5056
"search/terminology/",

0 commit comments

Comments
 (0)