Skip to content

Commit 0edee22

Browse files
authored
Add word span info to results (#9)
* build refactoring * extand api to get phrase letter offset arrays * export symbol fix
1 parent 1fcbb80 commit 0edee22

File tree

11 files changed

+82
-18
lines changed

11 files changed

+82
-18
lines changed

Build.ps1

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,12 @@ if ($build) {
3737
} else {
3838

3939
# Build native linux libraries
40-
41-
sudo apt-get install -y libc6-dev-i386 gcc-multilib g++-multilib
42-
4340
cd native/source
4441

45-
mkdir -p ../../phonology_engine/Linux_x86
46-
g++ -fPIC -m32 -g -I ../include/ Transkr.cpp transcrLUSS.cpp Skiemen.cpp Kircdb.cpp fv2id.cpp ArKirciuoti.cpp Engine.cpp strtokf.cpp stringWithLetterPosition.cpp TextNormalization.cpp -shared -o ../../phonology_engine/Linux_x86/libPhonologyEngine.so -Wno-write-strings
47-
48-
mkdir -p ../../phonology_engine/Linux_x86_64
49-
g++ -fPIC -g -I ../include/ Transkr.cpp transcrLUSS.cpp Skiemen.cpp Kircdb.cpp fv2id.cpp ArKirciuoti.cpp Engine.cpp strtokf.cpp stringWithLetterPosition.cpp TextNormalization.cpp -shared -o ../../phonology_engine/Linux_x86_64/libPhonologyEngine.so -Wno-write-strings
42+
make
5043

5144
cd ../..
52-
45+
5346
}
5447

5548
python setup.py build

native/include/Engine.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ EXPORT Result PhonologyEngineNormalizeText(char * szText, NormalizedTextHandle *
2525
EXPORT Result PhonologyEngineNormalizedTextFree(NormalizedTextHandle * pHandle);
2626
EXPORT Result PhonologyEngineNormalizedTextGetPhraseCount(NormalizedTextHandle handle, int * pValue);
2727
EXPORT Result PhonologyEngineNormalizedTextGetPhrase(NormalizedTextHandle handle, int index, char ** pSzValue);
28+
EXPORT Result PhonologyEngineNormalizedTextGetPhraseLetterMap(NormalizedTextHandle handle, int index, int ** pArValue, int * count);
2829

2930
EXPORT Result PhonologyEngineProcessPhrase(char * szNormalizedText, PhonologyEngineOutputHandle * pHandle);
3031
EXPORT Result PhonologyEngineOutputFree(PhonologyEngineOutputHandle * pHandle);

native/source/.VS2017/PhonologyEngine.def

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,13 @@ EXPORTS
33
PhonologyEngineInit
44
PhonologyEngineProcessPhrase
55

6-
PhonologyEngineOutputFree
6+
PhonologyEngineNormalizeText
7+
PhonologyEngineNormalizedTextFree
8+
PhonologyEngineNormalizedTextGetPhraseCount
9+
PhonologyEngineNormalizedTextGetPhrase
10+
PhonologyEngineNormalizedTextGetPhraseLetterMap
711

12+
PhonologyEngineOutputFree
813
PhonologyEngineOutputGetWordCount
914
PhonologyEngineOutputGetWord
1015
PhonologyEngineOutputGetWordSyllables

native/source/Engine.cpp

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@ EXPORT Result PhonologyEngineNormalizeText(char * szText, NormalizedTextHandle *
3030
char *pos = szNormalizedText;
3131
char sakinys[200];
3232
char * pPhrases[1024];
33+
int * pPhraseLetterMap[1024];
3334
int n, phraseCount = 0;
3435

3536
while ((pos != (void*)1))
3637
{
3738
int hr2 = 0;
3839
int lp = (int)(pos - szNormalizedText);
3940
n = sscanf(pos, "%[^\n]", sakinys);
40-
pos = strchr(pos, '\n') + 1;
4141
if (n < 0)
4242
break;
4343
if (n == 0)
@@ -48,17 +48,23 @@ EXPORT Result PhonologyEngineNormalizeText(char * szText, NormalizedTextHandle *
4848
sakinys[0] = 0; //jei eilute tuscia arba tik neskaitytini simboliai
4949

5050
pPhrases[phraseCount] = (char*) malloc((strlen(sakinys) + 1) * sizeof(char));
51-
if (!pPhrases[phraseCount]) return -3;
51+
pPhraseLetterMap[phraseCount] = (int*) malloc((strlen(sakinys)) * sizeof(int));
52+
if (!pPhrases[phraseCount] || !pPhraseLetterMap[phraseCount]) return -3;
5253
strcpy(pPhrases[phraseCount], sakinys);
54+
memcpy(pPhraseLetterMap[phraseCount], pArLetterPosistion + lp, strlen(sakinys) * sizeof(int));
55+
56+
pos = strchr(pos, '\n') + 1;
5357
phraseCount++;
5458
}
5559

5660
pNormalizedText->phraseCount = phraseCount;
5761
pNormalizedText->pSzPhrases = (char**) malloc(sizeof(char*) * phraseCount);
62+
pNormalizedText->pArLetterMap = (int**) malloc(sizeof(int*) * phraseCount);
5863

59-
if (!pNormalizedText->pSzPhrases) return -4;
64+
if (!pNormalizedText->pSzPhrases || !pNormalizedText->pArLetterMap) return -4;
6065

6166
memcpy(pNormalizedText->pSzPhrases, pPhrases, sizeof(char*) * phraseCount);
67+
memcpy(pNormalizedText->pArLetterMap, pPhraseLetterMap, sizeof(int*) * phraseCount);
6268

6369
*pHandle = pNormalizedText;
6470

@@ -79,10 +85,12 @@ EXPORT Result PhonologyEngineNormalizedTextFree(NormalizedTextHandle * pHandle)
7985
for (int i = 0; i < pNormalizedText->phraseCount; i++)
8086
{
8187
free(pNormalizedText->pSzPhrases[i]);
88+
free(pNormalizedText->pArLetterMap[i]);
8289
}
8390
}
8491

8592
free(pNormalizedText->pSzPhrases);
93+
free(pNormalizedText->pArLetterMap);
8694
free(pNormalizedText);
8795
*pHandle = NULL;
8896

@@ -117,6 +125,24 @@ EXPORT Result PhonologyEngineNormalizedTextGetPhrase(NormalizedTextHandle handle
117125
return 0;
118126
}
119127

128+
EXPORT Result PhonologyEngineNormalizedTextGetPhraseLetterMap(NormalizedTextHandle handle, int index, int ** pArValue, int * pCount)
129+
{
130+
NormalizedText * pOutput = GetObjectPtr(NormalizedText, handle);
131+
132+
if (!handle) return -1;
133+
if (!pArValue) return -2;
134+
if (!pCount) return -3;
135+
136+
int count = 0;
137+
138+
if (index < 0 || index >= pOutput->phraseCount) return -4;
139+
140+
*pArValue = pOutput->pArLetterMap[index];
141+
*pCount = strlen(pOutput->pSzPhrases[index]);
142+
143+
return 0;
144+
}
145+
120146
////////////Pagrindine sintezavimo funkcija////////////////////////////////////////////////////////////////////////////////
121147
EXPORT Result PhonologyEngineProcessPhrase(char * szNormalizedText, PhonologyEngineOutputHandle * pHandle)
122148
{

native/source/Makefile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
all: install_dependencies compile_x86 compile_x86_64
2+
3+
install_dependencies:
4+
sudo apt-get install -y libc6-dev-i386 gcc-multilib g++-multilib
5+
6+
compile_x86:
7+
mkdir -p ../../phonology_engine/Linux_x86
8+
g++ -fPIC -m32 -g -I ../include/ Transkr.cpp transcrLUSS.cpp Skiemen.cpp Kircdb.cpp fv2id.cpp ArKirciuoti.cpp Engine.cpp strtokf.cpp stringWithLetterPosition.cpp TextNormalization.cpp -shared -o ../../phonology_engine/Linux_x86/libPhonologyEngine.so -Wno-write-strings
9+
10+
compile_x86_64:
11+
mkdir -p ../../phonology_engine/Linux_x86_64
12+
g++ -fPIC -g -I ../include/ Transkr.cpp transcrLUSS.cpp Skiemen.cpp Kircdb.cpp fv2id.cpp ArKirciuoti.cpp Engine.cpp strtokf.cpp stringWithLetterPosition.cpp TextNormalization.cpp -shared -o ../../phonology_engine/Linux_x86_64/libPhonologyEngine.so -Wno-write-strings
13+
14+
clean:
15+
rm ../../phonology_engine/Linux_x86/libPhonologyEngine.so
16+
rm ../../phonology_engine/Linux_x86_64/libPhonologyEngine.so

native/source/transcrLUSSInternal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ extern "C" {
1616
typedef struct _NormalizedText {
1717
int phraseCount;
1818
char ** pSzPhrases;
19+
int ** pArLetterMap;
1920
} NormalizedText;
2021

2122
typedef struct _WordStressOptions {

native/test/LithUSStest.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ int main(int argc, char *argv[])
4545
int pastrSk;
4646

4747
int hr = 0;
48-
if ((df = fopen("duom.txt", "rb")) == NULL)
48+
if ((df = fopen("../tests/duom.txt", "rb")) == NULL)
4949
{
5050
spausdinti_loga("Nepavyko atidaryti duom.txt failo");
5151
hr = -1;

phonology_engine/pe_native.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@
5454
_PhonologyEngineNormalizedTextGetPhrase = _PhonologyEngineLibrary.PhonologyEngineNormalizedTextGetPhrase#(NormalizedTextHandle handle, int index, char ** pSzValue);
5555
_PhonologyEngineNormalizedTextGetPhrase.argtypes = [c_void_p, c_int, POINTER(c_char_p)]
5656

57+
_PhonologyEngineNormalizedTextGetPhraseLetterMap = _PhonologyEngineLibrary.PhonologyEngineNormalizedTextGetPhraseLetterMap#(NormalizedTextHandle handle, int index, int ** pArValue, int * pCount)
58+
_PhonologyEngineNormalizedTextGetPhraseLetterMap.argtypes = [c_void_p, c_int, POINTER(POINTER(c_int)), POINTER(c_int)]
59+
5760
_PhonologyEngineProcessPhrase = _PhonologyEngineLibrary.PhonologyEngineProcessPhrase#(char * szNormalizedText, PhonologyEngineOutputHandle * pHandle);
5861
_PhonologyEngineProcessPhrase.argtypes = [c_char_p, POINTER(c_void_p)]
5962

@@ -111,6 +114,14 @@ def phonology_engine_normalized_text_get_phrase(handle, index):
111114
return ''
112115
return cs.value.decode(_native_encoding)
113116

117+
def phonology_engine_normalized_text_get_phrase_letter_map(handle, index):
118+
ci = POINTER(c_int)()
119+
c = c_int(0)
120+
121+
_check( _PhonologyEngineNormalizedTextGetPhraseLetterMap( handle, c_int(index), byref(ci), byref(c) ) )
122+
123+
return [ci[i] for i in range(c.value)]
124+
114125
def phonology_engine_process_phrase(text):
115126

116127
handle = c_void_p()

phonology_engine/pe_output.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def next(self): # Python 2
3434
raise StopIteration
3535
else:
3636
value = self.get_phrase(self.current)
37+
letter_map = self.get_phrase_letter_map(self.current)
3738

3839
if self.remove_stress_chars:
3940
for c in _stress_ascii_chars:
@@ -44,13 +45,16 @@ def next(self): # Python 2
4445
value = value.replace(c, '')
4546

4647
self.current += 1
47-
return value
48+
return value, letter_map
4849

4950
def get_phrase_count(self):
5051
return pe_native.phonology_engine_normalized_text_get_phrase_count(self.handle)
5152

5253
def get_phrase(self, index):
5354
return pe_native.phonology_engine_normalized_text_get_phrase(self.handle, index)
55+
56+
def get_phrase_letter_map(self, index):
57+
return pe_native.phonology_engine_normalized_text_get_phrase_letter_map(self.handle, index)
5458

5559
class PhonologyEngineOutput:
5660
def __init__(self, handle):

phonology_engine/phonology_engine.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,14 @@ def _process(self, text, separators, normalize=True, include_syllables=True, nor
3030
processed_phrases = []
3131

3232
if normalize_only:
33-
return ('', [' '.join(normalized_phrases)])
33+
return ('', [' '.join([p for p,_ in normalized_phrases])])
3434
else:
35-
for phrase in normalized_phrases:
35+
r = []
36+
for phrase, letter_map in normalized_phrases:
3637
processed_phrases.append(self._process(text=phrase, separators=separators, normalize=False, include_syllables=include_syllables, normalize_only=normalize_only))
37-
return ('', processed_phrases, list(normalized_phrases))
38+
r.append((phrase, letter_map))
39+
40+
return ('', processed_phrases, r)
3841

3942
else:
4043
if len(text) > _max_prase_length:

0 commit comments

Comments
 (0)