@@ -42,25 +42,14 @@ type ContentBuffer struct {
4242 currEventCounter int // Event counter for the current chunk.
4343 overlapEventCounter int // Event counter for the current overlap area (delays the next chunk's count).
4444
45- initialCapacity int // The initial capacity of the buffer.
46- shrinkFactor float64 // The threshold factor that triggers buffer shrinkage.
47- resizeFactor float64
45+ initialCapacity int // The initial capacity of the buffer.
4846
4947 counter CharCounter
5048}
5149
5250// BufferOption is a function type for configuring a ContentBuffer.
5351type BufferOption func (* ContentBuffer )
5452
55- // WithInitialCapacity sets the initial capacity of the buffer.
56- func WithInitialCapacity (capacity int ) BufferOption {
57- return func (c * ContentBuffer ) {
58- if capacity > 0 {
59- c .initialCapacity = capacity
60- }
61- }
62- }
63-
6453func WithMaxChars (maxChars int ) BufferOption {
6554 return func (c * ContentBuffer ) {
6655 c .maxChars = maxChars
@@ -83,15 +72,13 @@ func NewContentBuffer(opts ...BufferOption) *ContentBuffer {
8372 currStart : 0 ,
8473 currChars : 0 ,
8574 overlapCountDelayed : true ,
86- initialCapacity : 2049 ,
87- shrinkFactor : 2 ,
88- resizeFactor : 1.3 ,
8975 }
9076
9177 for _ , opt := range opts {
9278 opt (c )
9379 }
9480
81+ c .initialCapacity = 2 * c .counter .MaxBytesForChars (c .maxChars )
9582 c .buffer = make ([]byte , 0 , c .initialCapacity )
9683 return c
9784}
@@ -115,49 +102,22 @@ func (c *ContentBuffer) startNewChunk(disableOverlap bool) {
115102 c .overlapEventCounter = 0
116103
117104 if c .overlapCharNum > 0 && ! disableOverlap {
118- overlapStart := c .counter .TailStartIndex (c .buffer , c .overlapCharNum ) // Better implementation?
105+ overlapStart := c .counter .TailStartIndex (c .buffer , c .overlapCharNum )
119106 c .currStart = overlapStart
120107 c .currChars = c .overlapCharNum
121108 } else {
122- c .currStart = len ( c . buffer )
109+ c .currStart = end
123110 c .currChars = 0
124111 }
125112}
126113
127- // shrinkIfNeeded checks if the buffer's capacity needs to be reduced and performs the shrink if necessary.
128- func (c * ContentBuffer ) shrinkIfNeeded () {
129- currentCap := cap (c .buffer )
130- currentLen := len (c .buffer )
131-
132- // If the buffer is empty and its capacity is greater than the initial capacity,
133- // shrink it back to the initial capacity.
134- if currentLen == 0 && currentCap > c .initialCapacity {
135- c .buffer = make ([]byte , 0 , c .initialCapacity )
136- return
137- }
138-
139- // Only consider shrinking when the capacity is greater than the initial capacity.
140- if currentCap > c .initialCapacity {
141- targetShrinkCapacity := int (float64 (currentLen ) * c .shrinkFactor )
142- if targetShrinkCapacity < c .initialCapacity {
143- targetShrinkCapacity = c .initialCapacity
144- }
145-
146- if currentCap > targetShrinkCapacity {
147- newBuf := make ([]byte , currentLen , int (float64 (currentLen )* c .resizeFactor ))
148- copy (newBuf , c .buffer )
149- c .buffer = newBuf
150- }
151- }
152- }
153-
154114// Write adds data to the buffer.
155115func (c * ContentBuffer ) Write (data []byte ) {
156116 i := 0
157117 for i < len (data ) {
158- _ , size , err := c .counter .DecodeChar (data [i :])
118+ size , err := c .counter .DecodeOne (data [i :])
159119 if err != nil {
160- // As a fault-tolerance strategy, skip invalid UTF-8 bytes.
120+ // skip invalid bytes.
161121 i ++
162122 continue
163123 }
@@ -167,9 +127,9 @@ func (c *ContentBuffer) Write(data []byte) {
167127 i += size
168128
169129 if c .currChars == c .maxChars {
170- // Processing is complete and the buffered text has reached the upper limit.
130+ // Processing is complete, and the buffered text has reached the upper limit.
171131 c .startNewChunk (false )
172- if i = = len (data ) && c .overlapCharNum == 0 {
132+ if i > = len (data ) && c .overlapCharNum == 0 {
173133 c .boundaries [len (c .boundaries )- 1 ].writeTimes ++
174134 return
175135 }
@@ -186,7 +146,6 @@ func (c *ContentBuffer) Write(data []byte) {
186146// Flush commits the currently ongoing chunk.
187147func (c * ContentBuffer ) Flush () {
188148 if c .currChars > 0 {
189- // Avoid missing event counts.
190149 c .currEventCounter += c .overlapEventCounter
191150 c .startNewChunk (true )
192151 }
@@ -212,7 +171,6 @@ func (c *ContentBuffer) GetCompletedResult() SplitResult {
212171 eventCount += boundary .writeTimes
213172 }
214173
215- // Clean completed result
216174 if c .currStart > 0 {
217175 remainingSize := len (c .buffer ) - c .currStart
218176 copy (c .buffer , c .buffer [c .currStart :])
@@ -221,7 +179,6 @@ func (c *ContentBuffer) GetCompletedResult() SplitResult {
221179 c .currStart = 0
222180 c .outputIndex = 0
223181 }
224- c .shrinkIfNeeded ()
225182
226183 return SplitResult {Chunks : chunks , CompletedEvents : eventCount }
227184}
0 commit comments