Skip to content

Commit 67c5362

Browse files
committed
test: split sentence into words
#1994
1 parent 36ad629 commit 67c5362

File tree

2 files changed

+41
-0
lines changed

2 files changed

+41
-0
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package ai.elimu.util.linguistics;
2+
3+
import java.text.BreakIterator;
4+
import java.util.Locale;
5+
6+
public class ThaiHelper {
7+
8+
public static String splitIntoWords(String paragraph) {
9+
BreakIterator wordIterator = BreakIterator.getWordInstance(new Locale("th"));
10+
wordIterator.setText(paragraph);
11+
12+
String words = "";
13+
int start = wordIterator.first();
14+
int end = wordIterator.next();
15+
while (end != BreakIterator.DONE) {
16+
if (words.length() > 0) {
17+
words += " ";
18+
}
19+
words += paragraph.substring(start, end);
20+
start = end;
21+
end = wordIterator.next();
22+
}
23+
return words;
24+
}
25+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package ai.elimu.util.linguistics;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
5+
import java.io.UnsupportedEncodingException;
6+
7+
import org.junit.jupiter.api.Test;
8+
9+
public class ThaiHelperTest {
10+
11+
@Test
12+
public void testSplitIntoWords() throws UnsupportedEncodingException {
13+
assertEquals("ฉัน จะ ไป โรงเรียน", ThaiHelper.splitIntoWords("ฉันจะไปโรงเรียน"));
14+
assertEquals("เดี๋ยว วัน นี้ เรา กลับ บ้าน ไป พัก", ThaiHelper.splitIntoWords("เดี๋ยววันนี้เรากลับบ้านไปพัก"));
15+
}
16+
}

0 commit comments

Comments
 (0)