File tree Expand file tree Collapse file tree 4 files changed +24
-0
lines changed
Expand file tree Collapse file tree 4 files changed +24
-0
lines changed Original file line number Diff line number Diff line change 1+ 0.2.2 2023-09-06
2+ - Fix behaviour for end of text character positions
3+ when no end of sentence occured before.
4+
150.2.1 2023-09-05
26 - Add english tokenizer.
37 - Fix buffer bug.
Original file line number Diff line number Diff line change @@ -1018,6 +1018,10 @@ PARSECHAR:
10181018
10191019 if eot {
10201020 eot = false
1021+ if !sentenceEnd {
1022+ sentenceEnd = true
1023+ w.SentenceEnd(buffc)
1024+ }
10211025 textEnd = true
10221026 w.TextEnd(0)
10231027 if DEBUG {
Original file line number Diff line number Diff line change @@ -592,6 +592,10 @@ PARSECHARM:
592592
593593 if eot {
594594 eot = false
595+ if !sentenceEnd {
596+ sentenceEnd = true
597+ w.SentenceEnd(buffc)
598+ }
595599 textEnd = true
596600 w.TextEnd(buffc)
597601 rewindBuffer = true
Original file line number Diff line number Diff line change @@ -85,6 +85,18 @@ func TestTokenWriterFromOptions(t *testing.T) {
8585 matStr = w.String()
8686 assert.Equal("1 5 5 6\n1 6\n0 3 3 4\n0 4\n", matStr)
8787
88+ w.Reset()
89+ mat.TransduceTokenWriter(strings.NewReader("Tree\n\x04\n"), tws)
90+
91+ matStr = w.String()
92+ assert.Equal("0 4\n0 4\n", matStr)
93+
94+ w.Reset()
95+ mat.TransduceTokenWriter(strings.NewReader("Tree.\n\x04\n"), tws)
96+
97+ matStr = w.String()
98+ assert.Equal("0 4 4 5\n0 5\n", matStr)
99+
88100 //
89101 // Write sentence offsets without token offsets
90102 tws = NewTokenWriter(w, SENTENCE_POS|NEWLINE_AFTER_EOT)
You can’t perform that action at this time.
0 commit comments