File tree Expand file tree Collapse file tree 2 files changed +41
-0
lines changed
main/java/ai/elimu/util/linguistics
test/java/ai/elimu/util/linguistics Expand file tree Collapse file tree 2 files changed +41
-0
lines changed Original file line number Diff line number Diff line change 1+ package ai .elimu .util .linguistics ;
2+
3+ import java .text .BreakIterator ;
4+ import java .util .Locale ;
5+
6+ public class ThaiHelper {
7+
8+ public static String splitIntoWords (String paragraph ) {
9+ BreakIterator wordIterator = BreakIterator .getWordInstance (new Locale ("th" ));
10+ wordIterator .setText (paragraph );
11+
12+ String words = "" ;
13+ int start = wordIterator .first ();
14+ int end = wordIterator .next ();
15+ while (end != BreakIterator .DONE ) {
16+ if (words .length () > 0 ) {
17+ words += " " ;
18+ }
19+ words += paragraph .substring (start , end );
20+ start = end ;
21+ end = wordIterator .next ();
22+ }
23+ return words ;
24+ }
25+ }
Original file line number Diff line number Diff line change 1+ package ai .elimu .util .linguistics ;
2+
3+ import static org .junit .jupiter .api .Assertions .assertEquals ;
4+
5+ import java .io .UnsupportedEncodingException ;
6+
7+ import org .junit .jupiter .api .Test ;
8+
9+ public class ThaiHelperTest {
10+
11+ @ Test
12+ public void testSplitIntoWords () throws UnsupportedEncodingException {
13+ assertEquals ("ฉัน จะ ไป โรงเรียน" , ThaiHelper .splitIntoWords ("ฉันจะไปโรงเรียน" ));
14+ assertEquals ("เดี๋ยว วัน นี้ เรา กลับ บ้าน ไป พัก" , ThaiHelper .splitIntoWords ("เดี๋ยววันนี้เรากลับบ้านไปพัก" ));
15+ }
16+ }
You can’t perform that action at this time.
0 commit comments