-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSentenceSplitter.bas
More file actions
194 lines (181 loc) · 6.09 KB
/
SentenceSplitter.bas
File metadata and controls
194 lines (181 loc) · 6.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
B4J=true
Group=Default Group
ModulesStructureVersion=1
Type=Class
Version=10
@EndOfDesignText@
Sub Class_Globals
Private fx As JFX
Private frm As Form
Private CalculateTimeByWordsCheckBox As CheckBox
Private SourceTextArea As TextArea
Private TargetTextArea As TextArea
Private mSourceLang As String
End Sub
'Initializes the object. You can add parameters to this method if needed.
Public Sub Initialize(sourceLang As String)
frm.Initialize("frm",600,500)
frm.RootPane.LoadLayout("sentenceSplitter")
Main.loc.LocalizeForm(frm)
mSourceLang = sourceLang
End Sub
Public Sub SetText(source As String,target As String)
SourceTextArea.Text = source
TargetTextArea.Text = target
End Sub
Public Sub ShowAndWait As Map
frm.ShowAndWait
Dim result As Map
result.Initialize
result.Put("source",SourceTextArea.Text)
result.Put("target",TargetTextArea.Text)
result.Put("calculateByWords",CalculateTimeByWordsCheckBox.Checked)
Return result
End Sub
Public Sub SetCheckboxVisibility(value As Boolean)
CalculateTimeByWordsCheckBox.Visible = value
End Sub
Private Sub OkayButton_MouseClicked (EventData As MouseEvent)
frm.Close
End Sub
Public Sub GetNewLines(startTime As String,endTime As String,lang As String,source As String,target As String,calculateTimeByWords As Boolean) As List
Dim lines As List
lines.Initialize
Dim startTimeMs As Long = Utils.GetMillisecondsFromTimeString(startTime)
Dim endTimeMs As Long = Utils.GetMillisecondsFromTimeString(endTime)
Dim duration As Long = endTimeMs - startTimeMs
Dim splittedSource() As String = Regex.Split(CRLF,source)
Dim splittedTarget() As String = Regex.Split(CRLF,target)
If calculateTimeByWords Then
Dim syllableCount As Int = GetSyllablesLength(lang,source)
Dim singleSyllableDuration As Int = duration/syllableCount
Dim previousEndTime As Long = startTimeMs
For i = 0 To splittedSource.Length - 1
Dim source As String = splittedSource(i)
Dim target As String
If i<splittedTarget.Length Then
target = splittedTarget(i)
End If
syllableCount = GetSyllablesLength(lang,source)
Dim expectedDuration As Long = singleSyllableDuration * syllableCount
Dim newLine As Map
newLine.Initialize
newLine.Put("source",source)
newLine.Put("target",target)
newLine.Put("startTime",Utils.GetTimeStringFromMilliseconds(previousEndTime))
previousEndTime = previousEndTime + expectedDuration
newLine.Put("endTime",Utils.GetTimeStringFromMilliseconds(previousEndTime))
lines.Add(newLine)
Next
Else
Dim fixedSpan As Int = duration/splittedSource.Length
Dim previousEndTime As Long = startTimeMs
For i = 0 To splittedSource.Length - 1
Dim source As String = splittedSource(i)
Dim target As String
If i<splittedTarget.Length Then
target = splittedTarget(i)
End If
Dim newLine As Map
newLine.Initialize
newLine.Put("source",source)
newLine.Put("target",target)
newLine.Put("startTime",Utils.GetTimeStringFromMilliseconds(previousEndTime))
previousEndTime = previousEndTime + fixedSpan
newLine.Put("endTime",Utils.GetTimeStringFromMilliseconds(previousEndTime))
lines.Add(newLine)
Next
End If
Return lines
End Sub
Private Sub GetSyllablesLength(lang As String,text As String) As Int
If lang.StartsWith("zh") Or lang.StartsWith("ja") Then
Return Regex.Split("",text).Length
Else
Return getPhonemeCount(text)
End If
End Sub
'https://www.zhangxinxu.com/wordpress/2024/12/js-word-speach-split-time-calc/
Sub getPhonemeCount(s As String) As Int
Dim totalSyllables As Int = 0
' qu to tq
s = s.Replace("qu", "qw")
' replace endings
s = Regex.Replace("(es$)|(que$)|(gue$)",s, "")
s = Regex.Replace("^re",s, "ren")
s = Regex.Replace("^gua",s, "ga")
s = Regex.Replace("([aeiou])(l+e$)",s, "$1")
Dim matcher As Matcher
matcher = Regex.Matcher("([bcdfghjklmnpqrstvwxyz])(l+e$)", s)
Dim syllables As Int = 0
Do While matcher.Find
syllables = syllables + 1
Loop
totalSyllables = totalSyllables + syllables
s = Regex.Replace("([bcdfghjklmnpqrstvwxyz])(l+e$)",s, "$1")
s = Regex.Replace("([aeiou])(ed$)",s, "$1")
matcher = Regex.Matcher("([bcdfghjklmnpqrstvwxyz])(ed$)", s)
syllables = 0
Do While matcher.Find
syllables = syllables + 1
Loop
totalSyllables = totalSyllables + syllables
s = Regex.Replace("([bcdfghjklmnpqrstvwxyz])(ed$)",s, "$1")
Dim endsp As String = "(ly$)|(ful$)|(ness$)|(ing$)|(est$)|(er$)|(ent$)|(ence$)"
matcher = Regex.Matcher(endsp, s)
syllables = 0
Do While matcher.Find
syllables = syllables + 1
Loop
totalSyllables = totalSyllables + syllables
s = Regex.Replace(endsp,s, "")
matcher = Regex.Matcher(endsp, s)
syllables = 0
Do While matcher.Find
syllables = syllables + 1
Loop
totalSyllables = totalSyllables + syllables
s = Regex.Replace(endsp,s, "")
s = Regex.Replace("(^y)([aeiou][aeiou]*)",s, "$2")
s = Regex.Replace("([aeiou])(y)",s, "$1t")
s = Regex.Replace("aa+",s, "a")
s = Regex.Replace("ee+",s, "e")
s = Regex.Replace("ii+",s, "i")
s = Regex.Replace("oo+",s, "o")
s = Regex.Replace("uu+",s, "u")
' Dipthongs
Dim dipthongs As String = "(eau)|(iou)|(are)|(ai)|(au)|(ea)|(ei)|(eu)|(ie)|(io)|(oa)|(oe)|(oi)|(ou)|(ue)|(ui)"
matcher = Regex.Matcher(dipthongs, s)
syllables = 0
Do While matcher.Find
syllables = syllables + 1
Loop
totalSyllables = totalSyllables + syllables
s = Regex.Replace(dipthongs,s, "")
' Remove silent 'e' if length is greater than 3
If s.Length > 3 Then
s = Regex.Replace("([bcdfghjklmnpqrstvwxyz])(e$)",s, "$1")
End If
' Count vowels
matcher = Regex.Matcher("[aeiouy]", s)
syllables = 0
Do While matcher.Find
syllables = syllables + 1
Loop
totalSyllables = totalSyllables + syllables
Return totalSyllables
End Sub
Private Sub SplitButton_MouseClicked (EventData As MouseEvent)
Dim srxPath As String
If File.Exists(File.DirApp,"segmentationRules.srx") Then
srxPath = File.Combine(File.DirApp,"segmentationRules.srx")
End If
wait for (segmentation.segmentedTxt(SourceTextArea.Text,True,mSourceLang,srxPath,True)) complete (segments As List)
Dim sb As StringBuilder
sb.Initialize
For Each segment As String In segments
sb.Append(segment.Trim)
sb.Append(CRLF)
Next
SourceTextArea.Text = sb.ToString.Trim
End Sub