Skip to content

Commit b32159a

Browse files
committed
port/azerbeijan
MycroftAI#212
1 parent 8e98923 commit b32159a

19 files changed

+3135
-3
lines changed

lingua_franca/internal.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,17 @@
99
from lingua_franca.time import to_local
1010

1111

12-
_SUPPORTED_LANGUAGES = ("ca", "cs", "da", "de", "en", "es", "fr", "hu",
12+
_SUPPORTED_LANGUAGES = ("az", "ca", "cs", "da", "de", "en", "es", "fr", "hu",
1313
"it", "nl", "pl", "pt", "ru", "sl", "sv", "fa")
1414

15-
_SUPPORTED_FULL_LOCALIZATIONS = ("ca-es", "cs-cz", "da-dk", "de-de",
15+
_SUPPORTED_FULL_LOCALIZATIONS = ("az-az", "ca-es", "cs-cz", "da-dk", "de-de",
1616
"en-au", "en-us", "es-es", "fr-fr",
1717
"hu-hu", "it-it", "nl-nl", "pl-pl",
1818
"fa-ir", "pt-pt", "ru-ru", "sl-si",
1919
"sv-se", "tr-tr")
2020

21-
_DEFAULT_FULL_LANG_CODES = {'ca': 'ca-es',
21+
_DEFAULT_FULL_LANG_CODES = {'az': 'az-az',
22+
'ca': 'ca-es',
2223
'cs': 'cs-cz',
2324
'da': 'da-dk',
2425
'de': 'de-de',

lingua_franca/lang/common_data_az.py

+296
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
#
2+
# Copyright 2021 Mycroft AI Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
from collections import OrderedDict
17+
from .parse_common import invert_dict
18+
19+
_FUNCTION_NOT_IMPLEMENTED_WARNING = "Tələb olunan funksiya Azərbaycan dilində yerinə yetirilmir."
20+
21+
_NUM_STRING_AZ = {
22+
0: 'sıfır',
23+
1: 'bir',
24+
2: 'iki',
25+
3: 'üç',
26+
4: 'dörd',
27+
5: 'beş',
28+
6: 'altı',
29+
7: 'yeddi',
30+
8: 'səkkiz',
31+
9: 'doqquz',
32+
10: 'on',
33+
11: 'on bir',
34+
12: 'on iki',
35+
13: 'on üç',
36+
14: 'on dörd',
37+
15: 'on beş',
38+
16: 'on altı',
39+
17: 'on yeddi',
40+
18: 'on səkkiz',
41+
19: 'on doqquz',
42+
20: 'iyirmi',
43+
30: 'otuz',
44+
40: 'qırx',
45+
50: 'əlli',
46+
60: 'altmış',
47+
70: 'yetmiş',
48+
80: 'səksən',
49+
90: 'doxsan'
50+
}
51+
52+
_FRACTION_STRING_AZ = {
53+
2: 'ikidə',
54+
3: 'üçdə',
55+
4: 'dörddə',
56+
5: 'beşdə',
57+
6: 'altıda',
58+
7: 'yeddidə',
59+
8: 'səkkizdə',
60+
9: 'doqquzda',
61+
10: 'onda',
62+
11: 'on birdə',
63+
12: 'on ikidə',
64+
13: 'on üçdə',
65+
14: 'on dörddə',
66+
15: 'on beşdə',
67+
16: 'on altıda',
68+
17: 'on yeddidə',
69+
18: 'on səkkizdə',
70+
19: 'on doqquzda',
71+
20: 'iyirmidə',
72+
30: 'otuzda',
73+
40: 'qırxda',
74+
50: 'əllidə',
75+
60: 'altmışda',
76+
70: 'yetmişdə',
77+
80: 'səksəndə',
78+
90: 'doxsanda',
79+
1e2: 'yüzdə',
80+
1e3: 'mində'
81+
}
82+
83+
84+
_LONG_SCALE_AZ = OrderedDict([
85+
(100, 'yüz'),
86+
(1000, 'min'),
87+
(1000000, 'milyon'),
88+
(1e12, "milyard"),
89+
(1e18, 'trilyon'),
90+
(1e24, "kvadrilyon"),
91+
(1e30, "kvintilyon"),
92+
(1e36, "sekstilyon"),
93+
(1e42, "septilyon"),
94+
(1e48, "oktilyon"),
95+
(1e54, "nonilyon"),
96+
(1e60, "dekilyon")
97+
])
98+
99+
100+
_SHORT_SCALE_AZ = OrderedDict([
101+
(100, 'yüz'),
102+
(1000, 'min'),
103+
(1000000, 'milyon'),
104+
(1e9, "milyard"),
105+
(1e12, 'trilyon'),
106+
(1e15, "kvadrilyon"),
107+
(1e18, "kvintilyon"),
108+
(1e21, "sekstilyon"),
109+
(1e24, "septilyon"),
110+
(1e27, "oktilyon"),
111+
(1e30, "nonilyon"),
112+
(1e33, "dekilyon")
113+
])
114+
115+
_ORDINAL_BASE_AZ = {
116+
1: 'birinci',
117+
2: 'ikinci',
118+
3: 'üçüncü',
119+
4: 'dördüncü',
120+
5: 'beşinci',
121+
6: 'altıncı',
122+
7: 'yeddinci',
123+
8: 'səkkizinci',
124+
9: 'doqquzuncu',
125+
10: 'onuncu',
126+
11: 'on birinci',
127+
12: 'on ikinci',
128+
13: 'on üçüncü',
129+
14: 'on dördüncü',
130+
15: 'on beşinci',
131+
16: 'on altıncı',
132+
17: 'on yeddinci',
133+
18: 'on səkkizinci',
134+
19: 'on doqquzuncu',
135+
20: 'iyirminci',
136+
30: 'otuzuncu',
137+
40: "qırxıncı",
138+
50: "əllinci",
139+
60: "altmışıncı",
140+
70: "yetmışinci",
141+
80: "səksəninci",
142+
90: "doxsanınçı",
143+
1e2: "yüzüncü",
144+
1e3: "mininci"
145+
}
146+
147+
_SHORT_ORDINAL_AZ = {
148+
1e6: "milyonuncu",
149+
1e9: "milyardıncı",
150+
1e12: "trilyonuncu",
151+
1e15: "kvadrilyonuncu",
152+
1e18: "kvintilyonuncu",
153+
1e21: "sekstilyonuncu",
154+
1e24: "septilyonuncu",
155+
1e27: "oktilyonuncu",
156+
1e30: "nonilyonuncu",
157+
1e33: "dekilyonuncu"
158+
# TODO > 1e-33
159+
}
160+
_SHORT_ORDINAL_AZ.update(_ORDINAL_BASE_AZ)
161+
162+
163+
_LONG_ORDINAL_AZ = {
164+
1e6: "milyonuncu",
165+
1e12: "milyardıncı",
166+
1e18: "trilyonuncu",
167+
1e24: "kvadrilyonuncu",
168+
1e30: "kvintilyonuncu",
169+
1e36: "sekstilyonuncu",
170+
1e42: "septilyonuncu",
171+
1e48: "oktilyonuncu",
172+
1e54: "nonilyonuncu",
173+
1e60: "dekilyonuncu"
174+
# TODO > 1e60
175+
}
176+
_LONG_ORDINAL_AZ.update(_ORDINAL_BASE_AZ)
177+
178+
179+
# negate next number (-2 = 0 - 2)
180+
_NEGATIVES_AZ = {"mənfi", "minus"}
181+
182+
# sum the next number (iyirmi iki = 20 + 2)
183+
_SUMS_AZ = {'on', '10', 'iyirmi', '20', 'otuz', '30', 'qırx', '40', 'əlli', '50',
184+
'altmış', '60', 'yetmiş', '70', 'səksən', '80', 'doxsan', '90'}
185+
186+
_HARD_VOWELS = ['a', 'ı', 'o', 'u']
187+
_SOFT_VOWELS = ['e', 'ə', 'i', 'ö', 'ü']
188+
_VOWELS = _HARD_VOWELS + _SOFT_VOWELS
189+
190+
def _get_last_vowel(word):
191+
is_last = True
192+
for char in word[::-1]:
193+
if char in _VOWELS:
194+
return char, is_last
195+
is_last = False
196+
197+
return "", is_last
198+
199+
def _last_vowel_type(word):
200+
return _get_last_vowel(word)[0] in _HARD_VOWELS
201+
202+
def _get_ordinal_ak(word):
203+
last_vowel, is_last = _get_last_vowel(word)
204+
if not last_vowel:
205+
return ""
206+
207+
if last_vowel in ["a", "ı"]:
208+
if is_last:
209+
return "ncı"
210+
return "ıncı"
211+
212+
if last_vowel == ["e", "ə", "i"]:
213+
if is_last:
214+
return "nci"
215+
return "inci"
216+
217+
if last_vowel in ["o", "u"]:
218+
if is_last:
219+
return "ncu"
220+
return "uncu"
221+
222+
if last_vowel == ["ö", "ü"]:
223+
if is_last:
224+
return "ncü"
225+
return "üncü"
226+
227+
def _get_full_time_ak(hour):
228+
if hour in [1, 3, 4, 5, 8, 11]:
229+
return "ə"
230+
if hour in [2, 7, 12]:
231+
return "yə"
232+
if hour in [9, 10]:
233+
return "a"
234+
return "ya"
235+
236+
def _get_half_time_ak(hour):
237+
if hour in [1, 5, 8, 11]:
238+
return "in"
239+
if hour in [2, 7, 12]:
240+
return "nin"
241+
if hour in [3, 4]:
242+
return "ün"
243+
if hour in [9, 10]:
244+
return "un"
245+
return "nın"
246+
247+
def _get_daytime(hour):
248+
if hour < 6:
249+
return "gecə"
250+
if hour < 12:
251+
return "səhər"
252+
if hour < 18:
253+
return "gündüz"
254+
return "axşam"
255+
256+
def _generate_plurals_az(originals):
257+
"""
258+
Return a new set or dict containing the plural form of the original values,
259+
260+
In Azerbaijani this means appending 'lar' or 'lər' to them according to the last vowel in word.
261+
262+
Args:
263+
originals set(str) or dict(str, any): values to pluralize
264+
265+
Returns:
266+
set(str) or dict(str, any)
267+
268+
"""
269+
270+
if isinstance(originals, dict):
271+
return {key + ('lar' if _last_vowel_type(key) else 'lər'): value for key, value in originals.items()}
272+
return {value + ('lar' if _last_vowel_type(value) else 'lər') for value in originals}
273+
274+
275+
_MULTIPLIES_LONG_SCALE_AZ = set(_LONG_SCALE_AZ.values()) | \
276+
set(_LONG_SCALE_AZ.values())
277+
278+
_MULTIPLIES_SHORT_SCALE_AZ = set(_SHORT_SCALE_AZ.values()) | \
279+
set(_SHORT_SCALE_AZ.values())
280+
281+
# split sentence parse separately and sum ( 2 and a half = 2 + 0.5 )
282+
_FRACTION_MARKER_AZ = {"və"}
283+
284+
# decimal marker ( 1 nöqtə 5 = 1 + 0.5)
285+
_DECIMAL_MARKER_AZ = {"nöqtə"}
286+
287+
_STRING_NUM_AZ = invert_dict(_NUM_STRING_AZ)
288+
289+
_SPOKEN_EXTRA_NUM_AZ = {
290+
"yarım": 0.5,
291+
"üçdəbir": 1 / 3,
292+
"dörddəbir": 1 / 4
293+
}
294+
295+
_STRING_SHORT_ORDINAL_AZ = invert_dict(_SHORT_ORDINAL_AZ)
296+
_STRING_LONG_ORDINAL_AZ = invert_dict(_LONG_ORDINAL_AZ)

0 commit comments

Comments
 (0)