Skip to content

Commit 1307731

Browse files
committed
improve implementation based on review feedback
1 parent 0e2ffe0 commit 1307731

23 files changed

Lines changed: 2297 additions & 882 deletions

File tree

plugins/go.mod

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ require (
3636
github.com/redis/go-redis/v9 v9.5.5
3737
github.com/stretchr/testify v1.10.0
3838
github.com/tidwall/gjson v1.18.0
39-
github.com/tznbdbb/sseparser v0.0.0-20250718142429-b89b446d9a25
4039
golang.org/x/oauth2 v0.21.0
4140
golang.org/x/sync v0.10.0
4241
golang.org/x/time v0.6.0
@@ -76,7 +75,6 @@ require (
7675
github.com/pkg/errors v0.9.1 // indirect
7776
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
7877
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
79-
github.com/prataprc/goparsec v0.0.0-20211219142520-daac0e635e7e // indirect
8078
github.com/prometheus/client_golang v1.20.2 // indirect
8179
github.com/prometheus/client_model v0.6.1 // indirect
8280
github.com/prometheus/common v0.55.0 // indirect

plugins/go.sum

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,8 +331,6 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
331331
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
332332
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
333333
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
334-
github.com/prataprc/goparsec v0.0.0-20211219142520-daac0e635e7e h1:7teoyCCMBovX+/L3/C2adcGNJI6Tsx6a2hbWQ8vWoO8=
335-
github.com/prataprc/goparsec v0.0.0-20211219142520-daac0e635e7e/go.mod h1:YbpxZqbf10o5u96/iDpcfDQmbIOTX/iNCH/yBByTfaM=
336334
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
337335
github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs=
338336
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
@@ -421,8 +419,6 @@ github.com/tklauser/go-sysconf v0.3.6/go.mod h1:MkWzOF4RMCshBAMXuhXJs64Rte09mITn
421419
github.com/tklauser/numcpus v0.2.2 h1:oyhllyrScuYI6g+h/zUvNXNp1wy7x8qQy3t/piefldA=
422420
github.com/tklauser/numcpus v0.2.2/go.mod h1:x3qojaO3uyYt0i56EW/VUYs7uBvdl2fkfZFu0T9wgjM=
423421
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
424-
github.com/tznbdbb/sseparser v0.0.0-20250718142429-b89b446d9a25 h1:rzq+gcbp+yjVKhKBE9RQmB9pIvVRT80ngRcxNwQmBdQ=
425-
github.com/tznbdbb/sseparser v0.0.0-20250718142429-b89b446d9a25/go.mod h1:lzQJaJSI49MI4SuyXabLl0vX8xpmBESDCxNtIuViFVY=
426422
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
427423
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
428424
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=

plugins/plugins/aicontentsecurity/config.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@ type config struct {
5050
}
5151

5252
func (conf *config) Init(cb api.ConfigCallbackHandler) error {
53-
if conf.ModerationTimeout > 0 {
54-
conf.moderationTimeout = time.Duration(conf.ModerationTimeout) * time.Millisecond
53+
if conf.ModerationTimeout != "" {
54+
conf.moderationTimeout, _ = time.ParseDuration(conf.ModerationTimeout)
5555
} else {
56-
conf.moderationTimeout = time.Duration(3 * int(time.Second))
56+
conf.moderationTimeout = 3 * time.Second
5757
}
5858

5959
providerTypeName := reflect.TypeOf(conf.ProviderConfig).String()

plugins/plugins/aicontentsecurity/config_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ package aicontentsecurity
1616

1717
import (
1818
"testing"
19-
"time"
2019

2120
"github.com/stretchr/testify/require"
2221

@@ -26,7 +25,7 @@ import (
2625
func TestDefaultValue(t *testing.T) {
2726
cfg := &config{}
2827
cfg.CustomConfig.Config = aicontentsecurity.Config{
29-
ModerationTimeout: int64(2 * time.Second / time.Millisecond), // 2000 ms
28+
ModerationTimeout: "2000ms",
3029
StreamingEnabled: true,
3130
ModerationCharLimit: 5000,
3231
ModerationChunkOverlapLength: 100,

plugins/plugins/aicontentsecurity/contentbuffer/char_counter.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,21 @@ import (
2121

2222
type CharCounter interface {
2323
Count(data []byte) int
24-
DecodeChar(data []byte) (r rune, size int, err error)
24+
DecodeOne(data []byte) (size int, err error)
2525
TailStartIndex(data []byte, n int) int
26+
MaxBytesForChars(n int) int
2627
}
2728

2829
type Utf8RuneCounter struct{}
2930

3031
func (Utf8RuneCounter) Count(data []byte) int { return utf8.RuneCount(data) }
3132

32-
func (Utf8RuneCounter) DecodeChar(data []byte) (rune, int, error) {
33+
func (Utf8RuneCounter) DecodeOne(data []byte) (int, error) {
3334
r, size := utf8.DecodeRune(data)
3435
if r == utf8.RuneError && size == 1 {
35-
return r, size, fmt.Errorf("invalid utf8 encoding")
36+
return size, fmt.Errorf("invalid utf8 encoding")
3637
}
37-
return r, size, nil
38+
return size, nil
3839
}
3940

4041
func (Utf8RuneCounter) TailStartIndex(data []byte, n int) int {
@@ -50,3 +51,11 @@ func (Utf8RuneCounter) TailStartIndex(data []byte, n int) int {
5051
}
5152
return i
5253
}
54+
55+
func (Utf8RuneCounter) MaxBytesForChars(n int) int {
56+
if n <= 0 {
57+
return 0
58+
}
59+
60+
return n * 4
61+
}

plugins/plugins/aicontentsecurity/contentbuffer/content_buffer.go

Lines changed: 8 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,12 @@
1414

1515
package contentbuffer
1616

17-
// chunkBoundary stores metadata for a single data chunk.
1817
type chunkBoundary struct {
1918
start int
2019
end int
2120
writeTimes int
2221
}
2322

24-
// SplitResult encapsulates the result of a split operation.
2523
type SplitResult struct {
2624
Chunks []string
2725
CompletedEvents int
@@ -42,25 +40,13 @@ type ContentBuffer struct {
4240
currEventCounter int // Event counter for the current chunk.
4341
overlapEventCounter int // Event counter for the current overlap area (delays the next chunk's count).
4442

45-
initialCapacity int // The initial capacity of the buffer.
46-
shrinkFactor float64 // The threshold factor that triggers buffer shrinkage.
47-
resizeFactor float64
43+
initialCapacity int // The initial capacity of the buffer.
4844

4945
counter CharCounter
5046
}
5147

52-
// BufferOption is a function type for configuring a ContentBuffer.
5348
type BufferOption func(*ContentBuffer)
5449

55-
// WithInitialCapacity sets the initial capacity of the buffer.
56-
func WithInitialCapacity(capacity int) BufferOption {
57-
return func(c *ContentBuffer) {
58-
if capacity > 0 {
59-
c.initialCapacity = capacity
60-
}
61-
}
62-
}
63-
6450
func WithMaxChars(maxChars int) BufferOption {
6551
return func(c *ContentBuffer) {
6652
c.maxChars = maxChars
@@ -73,7 +59,6 @@ func WithOverlapCharNum(overlapCharNum int) BufferOption {
7359
}
7460
}
7561

76-
// NewContentBuffer creates and initializes a new ContentBuffer.
7762
func NewContentBuffer(opts ...BufferOption) *ContentBuffer {
7863
c := &ContentBuffer{
7964
maxChars: 100,
@@ -83,15 +68,13 @@ func NewContentBuffer(opts ...BufferOption) *ContentBuffer {
8368
currStart: 0,
8469
currChars: 0,
8570
overlapCountDelayed: true,
86-
initialCapacity: 2049,
87-
shrinkFactor: 2,
88-
resizeFactor: 1.3,
8971
}
9072

9173
for _, opt := range opts {
9274
opt(c)
9375
}
9476

77+
c.initialCapacity = 2 * c.counter.MaxBytesForChars(c.maxChars)
9578
c.buffer = make([]byte, 0, c.initialCapacity)
9679
return c
9780
}
@@ -115,49 +98,22 @@ func (c *ContentBuffer) startNewChunk(disableOverlap bool) {
11598
c.overlapEventCounter = 0
11699

117100
if c.overlapCharNum > 0 && !disableOverlap {
118-
overlapStart := c.counter.TailStartIndex(c.buffer, c.overlapCharNum) // Better implementation?
101+
overlapStart := c.counter.TailStartIndex(c.buffer, c.overlapCharNum)
119102
c.currStart = overlapStart
120103
c.currChars = c.overlapCharNum
121104
} else {
122-
c.currStart = len(c.buffer)
105+
c.currStart = end
123106
c.currChars = 0
124107
}
125108
}
126109

127-
// shrinkIfNeeded checks if the buffer's capacity needs to be reduced and performs the shrink if necessary.
128-
func (c *ContentBuffer) shrinkIfNeeded() {
129-
currentCap := cap(c.buffer)
130-
currentLen := len(c.buffer)
131-
132-
// If the buffer is empty and its capacity is greater than the initial capacity,
133-
// shrink it back to the initial capacity.
134-
if currentLen == 0 && currentCap > c.initialCapacity {
135-
c.buffer = make([]byte, 0, c.initialCapacity)
136-
return
137-
}
138-
139-
// Only consider shrinking when the capacity is greater than the initial capacity.
140-
if currentCap > c.initialCapacity {
141-
targetShrinkCapacity := int(float64(currentLen) * c.shrinkFactor)
142-
if targetShrinkCapacity < c.initialCapacity {
143-
targetShrinkCapacity = c.initialCapacity
144-
}
145-
146-
if currentCap > targetShrinkCapacity {
147-
newBuf := make([]byte, currentLen, int(float64(currentLen)*c.resizeFactor))
148-
copy(newBuf, c.buffer)
149-
c.buffer = newBuf
150-
}
151-
}
152-
}
153-
154110
// Write adds data to the buffer.
155111
func (c *ContentBuffer) Write(data []byte) {
156112
i := 0
157113
for i < len(data) {
158-
_, size, err := c.counter.DecodeChar(data[i:])
114+
size, err := c.counter.DecodeOne(data[i:])
159115
if err != nil {
160-
// As a fault-tolerance strategy, skip invalid UTF-8 bytes.
116+
// skip invalid bytes.
161117
i++
162118
continue
163119
}
@@ -167,9 +123,9 @@ func (c *ContentBuffer) Write(data []byte) {
167123
i += size
168124

169125
if c.currChars == c.maxChars {
170-
// Processing is complete and the buffered text has reached the upper limit.
126+
// Processing is complete, and the buffered text has reached the upper limit.
171127
c.startNewChunk(false)
172-
if i == len(data) && c.overlapCharNum == 0 {
128+
if i >= len(data) && c.overlapCharNum == 0 {
173129
c.boundaries[len(c.boundaries)-1].writeTimes++
174130
return
175131
}
@@ -186,7 +142,6 @@ func (c *ContentBuffer) Write(data []byte) {
186142
// Flush commits the currently ongoing chunk.
187143
func (c *ContentBuffer) Flush() {
188144
if c.currChars > 0 {
189-
// Avoid missing event counts.
190145
c.currEventCounter += c.overlapEventCounter
191146
c.startNewChunk(true)
192147
}
@@ -212,7 +167,6 @@ func (c *ContentBuffer) GetCompletedResult() SplitResult {
212167
eventCount += boundary.writeTimes
213168
}
214169

215-
// Clean completed result
216170
if c.currStart > 0 {
217171
remainingSize := len(c.buffer) - c.currStart
218172
copy(c.buffer, c.buffer[c.currStart:])
@@ -221,7 +175,6 @@ func (c *ContentBuffer) GetCompletedResult() SplitResult {
221175
c.currStart = 0
222176
c.outputIndex = 0
223177
}
224-
c.shrinkIfNeeded()
225178

226179
return SplitResult{Chunks: chunks, CompletedEvents: eventCount}
227180
}

0 commit comments

Comments
 (0)