@@ -68,74 +68,95 @@ Determining word stresses, syllables, grammar form from word.
6868from phonology_engine import PhonologyEngine
6969from pprint import pprint
7070pe = PhonologyEngine()
71- res = pe.process('31 kačiukas perbėgo kelią.', include_syllables=True)
72- pprint(res)
71+ res = pe.process('31 kačiukas perbėgo kelią.', include_syllables=True)
72+ for word_details, phrase, normalized_phrase, letter_map in res:
73+ pprint(word_details)
7374```
7475Would result in
7576```
76- ('.',
77- [('',
78- [[{'ascii_stressed_word': 'TRI`-SDE-ŠIMT',
79- 'number_stressed_word': 'TRI0-SDE-ŠIMT',
80- 'stress_options': {'decoded_options': [{'rule': 'Nekaitomas žodis'}],
81- 'options': [(2, 0, 1, 1688)],
82- 'selected_index': 0},
83- 'syllables': [0, 3, 6],
84- 'utf8_stressed_word': 'TRÌ-SDE-ŠIMT',
85- 'word': 'TRI-SDE-ŠIMT'},
86- {'ascii_stressed_word': 'VI^E-NAS',
87- 'number_stressed_word': 'VI1E-NAS',
88- 'stress_options': {'decoded_options': [{'grammatical_case': 'Vardininkas',
89- 'number': 'vienaskaita',
90- 'rule': 'Linksnis ir kamieno '
91- 'tipas',
92- 'stem_type': 16,
93- 'stress_type': 1,
94- 'stressed_letter_index': 1}],
95- 'options': [(1, 1, 2, 4096)],
96- 'selected_index': 0},
97- 'syllables': [0, 3],
98- 'utf8_stressed_word': 'VÍE-NAS',
99- 'word': 'VIE-NAS'},
100- {'ascii_stressed_word': 'KA-ČIU`-KAS',
101- 'number_stressed_word': 'KA-ČIU0-KAS',
102- 'stress_options': {'decoded_options': [{'grammatical_case': 'Vardininkas',
103- 'number': 'vienaskaita',
104- 'rule': 'Linksnis ir kamieno '
105- 'tipas',
106- 'stem_type': 0,
107- 'stress_type': 0,
108- 'stressed_letter_index': 4}],
109- 'options': [(4, 0, 2, 0)],
110- 'selected_index': 0},
111- 'syllables': [0, 2, 5],
112- 'utf8_stressed_word': 'KA-ČIÙ-KAS',
113- 'word': 'KA-ČIU-KAS'},
114- {'ascii_stressed_word': 'PE^R-BĖ-GO',
115- 'number_stressed_word': 'PE1R-BĖ-GO',
116- 'stress_options': {'decoded_options': [{'rule': 'Veiksmazodžių kamienas '
117- 'ir galune (taisytina)'}],
118- 'options': [(1, 1, 0, 465)],
119- 'selected_index': 0},
120- 'syllables': [0, 3, 5],
121- 'utf8_stressed_word': 'PÉR-BĖ-GO',
122- 'word': 'PER-BĖ-GO'},
123- {'ascii_stressed_word': 'KE~-LIĄ',
124- 'number_stressed_word': 'KE2-LIĄ',
125- 'stress_options': {'decoded_options': [{'grammatical_case': 'Galininkas',
126- 'number': 'vienaskaita',
127- 'rule': 'Linksnis ir kamieno '
128- 'tipas',
129- 'stem_type': 2,
130- 'stress_type': 2,
131- 'stressed_letter_index': 1}],
132- 'options': [(1, 2, 2, 515)],
133- 'selected_index': 0},
134- 'syllables': [0, 2],
135- 'utf8_stressed_word': 'KẼ-LIĄ',
136- 'word': 'KE-LIĄ'}]],
137- ['TRISDEŠIMT VIENAS KAČIUKAS PERBĖGO KELIĄ']),
138- ''])
77+ ...
78+ [{'ascii_stressed_word': 'TRI`-SDE-ŠIMT',
79+ 'number_stressed_word': 'TRI0-SDE-ŠIMT',
80+ 'span_normalized': (0, 10),
81+ 'span_source': (0, 2),
82+ 'stress_options': {'decoded_options': [{'rule': 'Nekaitomas žodis'}],
83+ 'options': [(2, 0, 1, 1688)],
84+ 'selected_index': 0},
85+ 'syllables': [0, 3, 6],
86+ 'utf8_stressed_word': 'TRÌ-SDE-ŠIMT',
87+ 'word': 'TRISDEŠIMT',
88+ 'word_with_all_numeric_stresses': 'TRI0-SDE-ŠIMT',
89+ 'word_with_only_multiple_numeric_stresses': 'TRI-SDE-ŠIMT',
90+ 'word_with_syllables': 'TRI-SDE-ŠIMT'},
91+ {'ascii_stressed_word': 'VI^E-NAS',
92+ 'number_stressed_word': 'VI1E-NAS',
93+ 'span_normalized': (11, 17),
94+ 'span_source': (0, 2),
95+ 'stress_options': {'decoded_options': [{'grammatical_case': 'Vardininkas',
96+ 'number': 'vienaskaita',
97+ 'rule': 'Linksnis ir kamieno tipas',
98+ 'stem_type': 16,
99+ 'stress_type': 1,
100+ 'stressed_letter_index': 1}],
101+ 'options': [(1, 1, 2, 4096)],
102+ 'selected_index': 0},
103+ 'syllables': [0, 3],
104+ 'utf8_stressed_word': 'VÍE-NAS',
105+ 'word': 'VIENAS',
106+ 'word_with_all_numeric_stresses': 'VI1E-NAS',
107+ 'word_with_only_multiple_numeric_stresses': 'VIE-NAS',
108+ 'word_with_syllables': 'VIE-NAS'},
109+ {'ascii_stressed_word': 'KA-ČIU`-KAS',
110+ 'number_stressed_word': 'KA-ČIU0-KAS',
111+ 'span_normalized': (18, 26),
112+ 'span_source': (3, 11),
113+ 'stress_options': {'decoded_options': [{'grammatical_case': 'Vardininkas',
114+ 'number': 'vienaskaita',
115+ 'rule': 'Linksnis ir kamieno tipas',
116+ 'stem_type': 0,
117+ 'stress_type': 0,
118+ 'stressed_letter_index': 4}],
119+ 'options': [(4, 0, 2, 0)],
120+ 'selected_index': 0},
121+ 'syllables': [0, 2, 5],
122+ 'utf8_stressed_word': 'KA-ČIÙ-KAS',
123+ 'word': 'KAČIUKAS',
124+ 'word_with_all_numeric_stresses': 'KA-ČIU0-KAS',
125+ 'word_with_only_multiple_numeric_stresses': 'KA-ČIU-KAS',
126+ 'word_with_syllables': 'KA-ČIU-KAS'},
127+ {'ascii_stressed_word': 'PE^R-BĖ-GO',
128+ 'number_stressed_word': 'PE1R-BĖ-GO',
129+ 'span_normalized': (27, 34),
130+ 'span_source': (12, 19),
131+ 'stress_options': {'decoded_options': [{'rule': 'Veiksmazodžių kamienas ir '
132+ 'galune (taisytina)'}],
133+ 'options': [(1, 1, 0, 465)],
134+ 'selected_index': 0},
135+ 'syllables': [0, 3, 5],
136+ 'utf8_stressed_word': 'PÉR-BĖ-GO',
137+ 'word': 'PERBĖGO',
138+ 'word_with_all_numeric_stresses': 'PE1R-BĖ-GO',
139+ 'word_with_only_multiple_numeric_stresses': 'PER-BĖ-GO',
140+ 'word_with_syllables': 'PER-BĖ-GO'},
141+ {'ascii_stressed_word': 'KE~-LIĄ',
142+ 'number_stressed_word': 'KE2-LIĄ',
143+ 'span_normalized': (35, 40),
144+ 'span_source': (20, 25),
145+ 'stress_options': {'decoded_options': [{'grammatical_case': 'Galininkas',
146+ 'number': 'vienaskaita',
147+ 'rule': 'Linksnis ir kamieno tipas',
148+ 'stem_type': 2,
149+ 'stress_type': 2,
150+ 'stressed_letter_index': 1}],
151+ 'options': [(1, 2, 2, 515)],
152+ 'selected_index': 0},
153+ 'syllables': [0, 2],
154+ 'utf8_stressed_word': 'KẼ-LIĄ',
155+ 'word': 'KELIĄ',
156+ 'word_with_all_numeric_stresses': 'KE2-LIĄ',
157+ 'word_with_only_multiple_numeric_stresses': 'KE-LIĄ',
158+ 'word_with_syllables': 'KE-LIĄ'}]
159+
139160```
140161
141162# References
0 commit comments