Skip to content

Commit 9436eee

Browse files
committed
remove unused adaptive chunking function
worked but not effective, so deleted
1 parent b541789 commit 9436eee

6 files changed

Lines changed: 22 additions & 249 deletions

File tree

dumpling/export/dump.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ type Dumper struct {
7373
charsetAndDefaultCollationMap map[string]string
7474

7575
speedRecorder *SpeedRecorder
76-
adaptiveChunkSizer *AdaptiveChunkSizer
7776

7877
chunkedTables sync.Map
7978
}
@@ -100,7 +99,6 @@ func NewDumper(ctx context.Context, conf *Config) (*Dumper, error) {
10099
cancelCtx: cancelFn,
101100
selectTiDBTableRegionFunc: selectTiDBTableRegion,
102101
speedRecorder: NewSpeedRecorder(),
103-
adaptiveChunkSizer: NewAdaptiveChunkSizer(int64(conf.Rows)),
104102
}
105103

106104
var err error
@@ -355,7 +353,7 @@ func (d *Dumper) startWriters(tctx *tcontext.Context, wg *errgroup.Group, taskCh
355353
if err != nil {
356354
return nil, func() {}, err
357355
}
358-
writer := NewWriter(tctx, int64(i), conf, conn, d.extStore, d.metrics, d.adaptiveChunkSizer)
356+
writer := NewWriter(tctx, int64(i), conf, conn, d.extStore, d.metrics)
359357
writer.rebuildConnFn = rebuildConnFn
360358
writer.setFinishTableCallBack(func(task Task) {
361359
// this is called when a file is finished.

dumpling/export/sql.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,19 +1283,23 @@ func simpleQueryWithArgs(ctx context.Context, conn *sql.Conn, handleOneRow func(
12831283
return errors.Annotatef(rows.Err(), "sql: %s, args: %s", query, args)
12841284
}
12851285

1286-
func pickupPossibleField(tctx *tcontext.Context, meta TableMeta, db *BaseConn) (string, error) {
1286+
func pickupPossibleField(tctx *tcontext.Context, meta TableMeta, db *BaseConn) (string, bool, error) {
12871287
// try using _tidb_rowid first
12881288
if meta.HasImplicitRowID() {
1289-
return "_tidb_rowid", nil
1289+
return "_tidb_rowid", false, nil
12901290
}
12911291
// try to use pk or uk
12921292
fieldName, err := getNumericIndex(tctx, db, meta)
12931293
if err != nil {
1294-
return "", err
1294+
return "", false, err
12951295
}
12961296

1297-
// if fieldName == "", there is no proper index
1298-
return fieldName, nil
1297+
// if fieldName == "", there is no proper index, try string chunking
1298+
if fieldName == "" {
1299+
return pickupPossibleFieldForStringChunking(tctx, meta, db)
1300+
}
1301+
1302+
return fieldName, false, nil
12991303
}
13001304

13011305
func estimateCount(tctx *tcontext.Context, dbName, tableName string, db *BaseConn, field string, conf *Config) uint64 {

dumpling/export/sql_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1790,7 +1790,7 @@ func TestPickupPossibleField(t *testing.T) {
17901790
mock.ExpectQuery(query).WillReturnRows(rows)
17911791
}
17921792

1793-
field, err := pickupPossibleField(tctx, meta, baseConn)
1793+
field, _, err := pickupPossibleField(tctx, meta, baseConn)
17941794
if expectedErr != nil {
17951795
require.ErrorIs(t, err, expectedErr)
17961796
} else {

dumpling/export/string_chunking.go

Lines changed: 4 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -5,93 +5,11 @@ import (
55
"fmt"
66
"strconv"
77
"strings"
8-
"sync"
9-
"time"
108

119
tcontext "github.com/pingcap/tidb/dumpling/context"
1210
"go.uber.org/zap"
1311
)
1412

15-
// AdaptiveChunkSizer calculates optimal chunk sizes based on performance feedback
16-
type AdaptiveChunkSizer struct {
17-
sync.Mutex
18-
currentChunkSize int64
19-
minChunkSize int64
20-
maxChunkSize int64
21-
}
22-
23-
// NewAdaptiveChunkSizer creates a new adaptive chunk sizer
24-
func NewAdaptiveChunkSizer(initialChunkSize int64) *AdaptiveChunkSizer {
25-
min := initialChunkSize / 8
26-
if min == 0 {
27-
min = 1
28-
}
29-
return &AdaptiveChunkSizer{
30-
currentChunkSize: initialChunkSize,
31-
minChunkSize: min,
32-
maxChunkSize: initialChunkSize * 8,
33-
}
34-
}
35-
36-
func (acs *AdaptiveChunkSizer) Get() int64 {
37-
acs.Lock()
38-
defer acs.Unlock()
39-
return acs.currentChunkSize
40-
}
41-
42-
// Adjust adjusts chunk size based on actual performance
43-
func (acs *AdaptiveChunkSizer) Adjust(tctx *tcontext.Context, actualDuration time.Duration) {
44-
acs.Lock()
45-
defer acs.Unlock()
46-
47-
const (
48-
fastThreshold = 100 * time.Millisecond // Increased from 50ms to 100ms
49-
slowThreshold = 5 * time.Second // Increased from 1s to 5s
50-
increaseFactor = 1.5
51-
decreaseFactor = 1.5
52-
)
53-
54-
oldChunkSize := acs.currentChunkSize
55-
newChunkSize := acs.currentChunkSize
56-
57-
if actualDuration < fastThreshold && acs.currentChunkSize < acs.maxChunkSize {
58-
newChunkSize = int64(float64(acs.currentChunkSize) * increaseFactor)
59-
} else if actualDuration > slowThreshold && acs.currentChunkSize > acs.minChunkSize {
60-
newChunkSize = int64(float64(acs.currentChunkSize) / decreaseFactor)
61-
}
62-
63-
// Apply bounds
64-
if newChunkSize < acs.minChunkSize {
65-
newChunkSize = acs.minChunkSize
66-
}
67-
if newChunkSize > acs.maxChunkSize {
68-
newChunkSize = acs.maxChunkSize
69-
}
70-
if newChunkSize == 0 {
71-
newChunkSize = 1
72-
}
73-
74-
// Log chunk size changes
75-
if newChunkSize != oldChunkSize {
76-
var reason string
77-
if actualDuration < fastThreshold {
78-
reason = "fast query detected"
79-
} else if actualDuration > slowThreshold {
80-
reason = "slow query detected"
81-
} else {
82-
reason = "bounds adjustment"
83-
}
84-
85-
tctx.L().Info("adaptive chunk size adjusted",
86-
zap.Int64("oldChunkSize", oldChunkSize),
87-
zap.Int64("newChunkSize", newChunkSize),
88-
zap.Duration("queryDuration", actualDuration),
89-
zap.String("reason", reason))
90-
}
91-
92-
acs.currentChunkSize = newChunkSize
93-
}
94-
9513
// concurrentDumpStringFields handles composite key chunking with multiple columns
9614
func (d *Dumper) concurrentDumpStringFields(tctx *tcontext.Context, conn *BaseConn, meta TableMeta, taskChan chan<- Task, fields []string, orderByClause string, estimatedCount uint64) error {
9715
conf := d.conf
@@ -103,7 +21,7 @@ func (d *Dumper) concurrentDumpStringFields(tctx *tcontext.Context, conn *BaseCo
10321
totalCount = int64(conf.Rows) * 5 // Conservative fallback
10422
}
10523

106-
chunkSize := d.adaptiveChunkSizer.Get()
24+
chunkSize := int64(d.conf.Rows)
10725
if totalCount <= chunkSize {
10826
tctx.L().Info("table too small for chunking, using sequential dump",
10927
zap.String("database", db), zap.String("table", tbl))
@@ -191,23 +109,12 @@ func (d *Dumper) streamStringChunks(tctx *tcontext.Context, conn *BaseConn, meta
191109
break
192110
}
193111

194-
// Get current adaptive chunk size for this boundary
195-
currentChunkSize := d.adaptiveChunkSizer.Get()
196-
if i == 1 || currentChunkSize != chunkSize {
197-
tctx.L().Debug("using adaptive chunk size for boundary sampling",
198-
zap.String("database", db),
199-
zap.String("table", tbl),
200-
zap.Int64("chunkIndex", i),
201-
zap.Int64("originalChunkSize", chunkSize),
202-
zap.Int64("currentChunkSize", currentChunkSize))
203-
}
204-
205112
// Sample boundary for chunk i
206113
var sampleQuery string
207114

208115
if supportsRowNumber {
209116
// Use ROW_NUMBER() for more reliable boundary sampling
210-
rowNumber := i * currentChunkSize
117+
rowNumber := i * chunkSize
211118
sampleQuery = fmt.Sprintf(
212119
"SELECT %s FROM (SELECT %s, ROW_NUMBER() OVER (%s) as rn FROM `%s`.`%s`) t WHERE rn = %d",
213120
selectCols,
@@ -220,7 +127,7 @@ func (d *Dumper) streamStringChunks(tctx *tcontext.Context, conn *BaseConn, meta
220127
// Use cursor-based boundary sampling to avoid expensive OFFSET for large tables
221128
if len(previousBoundary) == 0 {
222129
// First boundary: OFFSET is acceptable for the first boundary
223-
offset := currentChunkSize
130+
offset := chunkSize
224131
sampleQuery = fmt.Sprintf(
225132
"SELECT %s FROM `%s`.`%s` %s LIMIT 1 OFFSET %d",
226133
selectCols,
@@ -240,7 +147,7 @@ func (d *Dumper) streamStringChunks(tctx *tcontext.Context, conn *BaseConn, meta
240147
escapeString(tbl),
241148
fullWhere,
242149
orderByClause,
243-
currentChunkSize) // Skip currentChunkSize more rows from cursor position
150+
chunkSize) // Skip chunkSize more rows from cursor position
244151
}
245152
}
246153

dumpling/export/string_chunking_test.go

Lines changed: 0 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
11
package export
22

33
import (
4-
"fmt"
5-
"strings"
64
"testing"
7-
"time"
85

9-
tcontext "github.com/pingcap/tidb/dumpling/context"
106
"github.com/stretchr/testify/require"
117
)
128

@@ -134,126 +130,6 @@ func TestBuildStringWhereClausesCompositeKey(t *testing.T) {
134130
require.Contains(t, clauses[0], "'100'", "Should contain escaped first boundary value")
135131
}
136132

137-
func TestAdaptiveChunkSizer(t *testing.T) {
138-
sizer := NewAdaptiveChunkSizer(1000)
139-
140-
// Test initial values
141-
require.Equal(t, int64(1000), sizer.Get())
142-
143-
// Create a test context with logger
144-
tctx := tcontext.Background()
145-
146-
// Test adjustment behavior
147-
testCases := []struct {
148-
name string
149-
actualDuration time.Duration
150-
expectIncrease bool
151-
expectDecrease bool
152-
}{
153-
{
154-
name: "fast query should increase chunk size",
155-
actualDuration: 30 * time.Millisecond, // faster than 50ms threshold
156-
expectIncrease: true,
157-
},
158-
{
159-
name: "slow query should decrease chunk size",
160-
actualDuration: 6 * time.Second, // slower than 1s threshold
161-
expectDecrease: true,
162-
},
163-
{
164-
name: "normal query should not change much",
165-
actualDuration: 500 * time.Millisecond, // between thresholds
166-
},
167-
}
168-
169-
for _, tc := range testCases {
170-
t.Run(tc.name, func(t *testing.T) {
171-
oldSize := sizer.Get()
172-
sizer.Adjust(tctx, tc.actualDuration)
173-
newSize := sizer.Get()
174-
175-
if tc.expectIncrease {
176-
require.Greater(t, newSize, oldSize, "Expected chunk size to increase")
177-
} else if tc.expectDecrease {
178-
require.Less(t, newSize, oldSize, "Expected chunk size to decrease")
179-
}
180-
})
181-
}
182-
}
183-
184-
func TestDataDrivenBoundaryGeneration(t *testing.T) {
185-
// Test the boundary generation logic (without actual DB queries)
186-
187-
// Test chunk calculation
188-
testCases := []struct {
189-
name string
190-
totalCount int64
191-
chunkSize int64
192-
expectedChunks int64
193-
}{
194-
{
195-
name: "exact division",
196-
totalCount: 1000,
197-
chunkSize: 100,
198-
expectedChunks: 10,
199-
},
200-
{
201-
name: "with remainder",
202-
totalCount: 1050,
203-
chunkSize: 100,
204-
expectedChunks: 11, // ceil(1050/100) = 11
205-
},
206-
{
207-
name: "small table",
208-
totalCount: 50,
209-
chunkSize: 100,
210-
expectedChunks: 1, // no chunking needed
211-
},
212-
}
213-
214-
for _, tc := range testCases {
215-
t.Run(tc.name, func(t *testing.T) {
216-
numChunks := (tc.totalCount + tc.chunkSize - 1) / tc.chunkSize
217-
require.Equal(t, tc.expectedChunks, numChunks, "Chunk calculation should be correct")
218-
219-
// Test boundary count (should be numChunks - 1)
220-
if numChunks > 1 {
221-
expectedBoundaries := numChunks - 1
222-
require.Greater(t, expectedBoundaries, int64(0), "Should need boundaries for multiple chunks")
223-
}
224-
})
225-
}
226-
}
227-
228-
func TestBatchQueryGeneration(t *testing.T) {
229-
// Test the UNION query generation logic
230-
chunkSize := int64(1000)
231-
numChunks := int64(4)
232-
233-
var unionParts []string
234-
for i := int64(1); i < numChunks; i++ {
235-
offset := i * chunkSize
236-
unionParts = append(unionParts, fmt.Sprintf(
237-
"(SELECT `id` FROM `test`.`table` ORDER BY `id` LIMIT 1 OFFSET %d)", offset))
238-
}
239-
240-
expectedParts := []string{
241-
"(SELECT `id` FROM `test`.`table` ORDER BY `id` LIMIT 1 OFFSET 1000)",
242-
"(SELECT `id` FROM `test`.`table` ORDER BY `id` LIMIT 1 OFFSET 2000)",
243-
"(SELECT `id` FROM `test`.`table` ORDER BY `id` LIMIT 1 OFFSET 3000)",
244-
}
245-
246-
require.Equal(t, expectedParts, unionParts, "Should generate correct UNION parts")
247-
require.Len(t, unionParts, 3, "Should generate numChunks-1 parts")
248-
249-
// Test batch query construction
250-
batchQuery := fmt.Sprintf("SELECT * FROM (%s) AS boundaries ORDER BY `id`",
251-
strings.Join(unionParts, " UNION ALL "))
252-
253-
require.Contains(t, batchQuery, "UNION ALL", "Should use UNION ALL for combining queries")
254-
require.Contains(t, batchQuery, "ORDER BY", "Should sort final results")
255-
}
256-
257133
func TestGetStringOrNumericIndexDetection(t *testing.T) {
258134
// This is a unit test for the index detection logic
259135
// Testing the type detection part with mock data

0 commit comments

Comments
 (0)