remove unused adaptive chunking function

takaidohigasi · takaidohigasi · commit 9436eee663a9 · 2025-07-03T15:15:00.000+09:00
worked but not effective, so deleted
diff --git a/dumpling/export/dump.go b/dumpling/export/dump.go
@@ -73,7 +73,6 @@ type Dumper struct {
 	charsetAndDefaultCollationMap map[string]string
 
 	speedRecorder      *SpeedRecorder
-	adaptiveChunkSizer *AdaptiveChunkSizer
 
 	chunkedTables sync.Map
 }
@@ -100,7 +99,6 @@ func NewDumper(ctx context.Context, conf *Config) (*Dumper, error) {
 		cancelCtx:                 cancelFn,
 		selectTiDBTableRegionFunc: selectTiDBTableRegion,
 		speedRecorder:             NewSpeedRecorder(),
-		adaptiveChunkSizer:        NewAdaptiveChunkSizer(int64(conf.Rows)),
 	}
 
 	var err error
@@ -355,7 +353,7 @@ func (d *Dumper) startWriters(tctx *tcontext.Context, wg *errgroup.Group, taskCh
 		if err != nil {
 			return nil, func() {}, err
 		}
-		writer := NewWriter(tctx, int64(i), conf, conn, d.extStore, d.metrics, d.adaptiveChunkSizer)
+		writer := NewWriter(tctx, int64(i), conf, conn, d.extStore, d.metrics)
 		writer.rebuildConnFn = rebuildConnFn
 		writer.setFinishTableCallBack(func(task Task) {
 			// this is called when a file is finished.
diff --git a/dumpling/export/sql.go b/dumpling/export/sql.go
@@ -1283,19 +1283,23 @@ func simpleQueryWithArgs(ctx context.Context, conn *sql.Conn, handleOneRow func(
 	return errors.Annotatef(rows.Err(), "sql: %s, args: %s", query, args)
 }
 
-func pickupPossibleField(tctx *tcontext.Context, meta TableMeta, db *BaseConn) (string, error) {
+func pickupPossibleField(tctx *tcontext.Context, meta TableMeta, db *BaseConn) (string, bool, error) {
 	// try using _tidb_rowid first
 	if meta.HasImplicitRowID() {
-		return "_tidb_rowid", nil
+		return "_tidb_rowid", false, nil
 	}
 	// try to use pk or uk
 	fieldName, err := getNumericIndex(tctx, db, meta)
 	if err != nil {
-		return "", err
+		return "", false, err
 	}
 
-	// if fieldName == "", there is no proper index
-	return fieldName, nil
+	// if fieldName == "", there is no proper index, try string chunking
+	if fieldName == "" {
+		return pickupPossibleFieldForStringChunking(tctx, meta, db)
+	}
+	
+	return fieldName, false, nil
 }
 
 func estimateCount(tctx *tcontext.Context, dbName, tableName string, db *BaseConn, field string, conf *Config) uint64 {
diff --git a/dumpling/export/sql_test.go b/dumpling/export/sql_test.go
@@ -1790,7 +1790,7 @@ func TestPickupPossibleField(t *testing.T) {
 			mock.ExpectQuery(query).WillReturnRows(rows)
 		}
 
-		field, err := pickupPossibleField(tctx, meta, baseConn)
+		field, _, err := pickupPossibleField(tctx, meta, baseConn)
 		if expectedErr != nil {
 			require.ErrorIs(t, err, expectedErr)
 		} else {
diff --git a/dumpling/export/string_chunking.go b/dumpling/export/string_chunking.go
@@ -5,93 +5,11 @@ import (
 	"fmt"
 	"strconv"
 	"strings"
-	"sync"
-	"time"
 
 	tcontext "github.com/pingcap/tidb/dumpling/context"
 	"go.uber.org/zap"
 )
 
-// AdaptiveChunkSizer calculates optimal chunk sizes based on performance feedback
-type AdaptiveChunkSizer struct {
-	sync.Mutex
-	currentChunkSize int64
-	minChunkSize     int64
-	maxChunkSize     int64
-}
-
-// NewAdaptiveChunkSizer creates a new adaptive chunk sizer
-func NewAdaptiveChunkSizer(initialChunkSize int64) *AdaptiveChunkSizer {
-	min := initialChunkSize / 8
-	if min == 0 {
-		min = 1
-	}
-	return &AdaptiveChunkSizer{
-		currentChunkSize: initialChunkSize,
-		minChunkSize:     min,
-		maxChunkSize:     initialChunkSize * 8,
-	}
-}
-
-func (acs *AdaptiveChunkSizer) Get() int64 {
-	acs.Lock()
-	defer acs.Unlock()
-	return acs.currentChunkSize
-}
-
-// Adjust adjusts chunk size based on actual performance
-func (acs *AdaptiveChunkSizer) Adjust(tctx *tcontext.Context, actualDuration time.Duration) {
-	acs.Lock()
-	defer acs.Unlock()
-
-	const (
-		fastThreshold  = 100 * time.Millisecond // Increased from 50ms to 100ms
-		slowThreshold  = 5 * time.Second        // Increased from 1s to 5s
-		increaseFactor = 1.5
-		decreaseFactor = 1.5
-	)
-
-	oldChunkSize := acs.currentChunkSize
-	newChunkSize := acs.currentChunkSize
-
-	if actualDuration < fastThreshold && acs.currentChunkSize < acs.maxChunkSize {
-		newChunkSize = int64(float64(acs.currentChunkSize) * increaseFactor)
-	} else if actualDuration > slowThreshold && acs.currentChunkSize > acs.minChunkSize {
-		newChunkSize = int64(float64(acs.currentChunkSize) / decreaseFactor)
-	}
-
-	// Apply bounds
-	if newChunkSize < acs.minChunkSize {
-		newChunkSize = acs.minChunkSize
-	}
-	if newChunkSize > acs.maxChunkSize {
-		newChunkSize = acs.maxChunkSize
-	}
-	if newChunkSize == 0 {
-		newChunkSize = 1
-	}
-
-	// Log chunk size changes
-	if newChunkSize != oldChunkSize {
-		var reason string
-		if actualDuration < fastThreshold {
-			reason = "fast query detected"
-		} else if actualDuration > slowThreshold {
-			reason = "slow query detected"
-		} else {
-			reason = "bounds adjustment"
-		}
-
-		tctx.L().Info("adaptive chunk size adjusted",
-			zap.Int64("oldChunkSize", oldChunkSize),
-			zap.Int64("newChunkSize", newChunkSize),
-			zap.Duration("queryDuration", actualDuration),
-			zap.String("reason", reason))
-	}
-
-	acs.currentChunkSize = newChunkSize
-}
-
 // concurrentDumpStringFields handles composite key chunking with multiple columns
 func (d *Dumper) concurrentDumpStringFields(tctx *tcontext.Context, conn *BaseConn, meta TableMeta, taskChan chan<- Task, fields []string, orderByClause string, estimatedCount uint64) error {
 	conf := d.conf
@@ -103,7 +21,7 @@ func (d *Dumper) concurrentDumpStringFields(tctx *tcontext.Context, conn *BaseCo
 		totalCount = int64(conf.Rows) * 5 // Conservative fallback
 	}
 
-	chunkSize := d.adaptiveChunkSizer.Get()
+	chunkSize := int64(d.conf.Rows)
 	if totalCount <= chunkSize {
 		tctx.L().Info("table too small for chunking, using sequential dump",
 			zap.String("database", db), zap.String("table", tbl))
@@ -191,23 +109,12 @@ func (d *Dumper) streamStringChunks(tctx *tcontext.Context, conn *BaseConn, meta
 			break
 		}
 
-		// Get current adaptive chunk size for this boundary
-		currentChunkSize := d.adaptiveChunkSizer.Get()
-		if i == 1 || currentChunkSize != chunkSize {
-			tctx.L().Debug("using adaptive chunk size for boundary sampling",
-				zap.String("database", db),
-				zap.String("table", tbl),
-				zap.Int64("chunkIndex", i),
-				zap.Int64("originalChunkSize", chunkSize),
-				zap.Int64("currentChunkSize", currentChunkSize))
-		}
-
 		// Sample boundary for chunk i
 		var sampleQuery string
 
 		if supportsRowNumber {
 			// Use ROW_NUMBER() for more reliable boundary sampling
-			rowNumber := i * currentChunkSize
+			rowNumber := i * chunkSize
 			sampleQuery = fmt.Sprintf(
 				"SELECT %s FROM (SELECT %s, ROW_NUMBER() OVER (%s) as rn FROM `%s`.`%s`) t WHERE rn = %d",
 				selectCols,
@@ -220,7 +127,7 @@ func (d *Dumper) streamStringChunks(tctx *tcontext.Context, conn *BaseConn, meta
 			// Use cursor-based boundary sampling to avoid expensive OFFSET for large tables
 			if len(previousBoundary) == 0 {
 				// First boundary: OFFSET is acceptable for the first boundary
-				offset := currentChunkSize
+				offset := chunkSize
 				sampleQuery = fmt.Sprintf(
 					"SELECT %s FROM `%s`.`%s` %s LIMIT 1 OFFSET %d",
 					selectCols,
@@ -240,7 +147,7 @@ func (d *Dumper) streamStringChunks(tctx *tcontext.Context, conn *BaseConn, meta
 					escapeString(tbl),
 					fullWhere,
 					orderByClause,
-					currentChunkSize) // Skip currentChunkSize more rows from cursor position
+					chunkSize) // Skip chunkSize more rows from cursor position
 			}
 		}
 
diff --git a/dumpling/export/string_chunking_test.go b/dumpling/export/string_chunking_test.go
@@ -1,12 +1,8 @@
 package export
 
 import (
-	"fmt"
-	"strings"
 	"testing"
-	"time"
 
-	tcontext "github.com/pingcap/tidb/dumpling/context"
 	"github.com/stretchr/testify/require"
 )
 
@@ -134,126 +130,6 @@ func TestBuildStringWhereClausesCompositeKey(t *testing.T) {
 	require.Contains(t, clauses[0], "'100'", "Should contain escaped first boundary value")
 }
 
-func TestAdaptiveChunkSizer(t *testing.T) {
-	sizer := NewAdaptiveChunkSizer(1000)
-
-	// Test initial values
-	require.Equal(t, int64(1000), sizer.Get())
-
-	// Create a test context with logger
-	tctx := tcontext.Background()
-
-	// Test adjustment behavior
-	testCases := []struct {
-		name           string
-		actualDuration time.Duration
-		expectIncrease bool
-		expectDecrease bool
-	}{
-		{
-			name:           "fast query should increase chunk size",
-			actualDuration: 30 * time.Millisecond, // faster than 50ms threshold
-			expectIncrease: true,
-		},
-		{
-			name:           "slow query should decrease chunk size",
-			actualDuration: 6 * time.Second, // slower than 1s threshold
-			expectDecrease: true,
-		},
-		{
-			name:           "normal query should not change much",
-			actualDuration: 500 * time.Millisecond, // between thresholds
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			oldSize := sizer.Get()
-			sizer.Adjust(tctx, tc.actualDuration)
-			newSize := sizer.Get()
-
-			if tc.expectIncrease {
-				require.Greater(t, newSize, oldSize, "Expected chunk size to increase")
-			} else if tc.expectDecrease {
-				require.Less(t, newSize, oldSize, "Expected chunk size to decrease")
-			}
-		})
-	}
-}
-
-func TestDataDrivenBoundaryGeneration(t *testing.T) {
-	// Test the boundary generation logic (without actual DB queries)
-
-	// Test chunk calculation
-	testCases := []struct {
-		name           string
-		totalCount     int64
-		chunkSize      int64
-		expectedChunks int64
-	}{
-		{
-			name:           "exact division",
-			totalCount:     1000,
-			chunkSize:      100,
-			expectedChunks: 10,
-		},
-		{
-			name:           "with remainder",
-			totalCount:     1050,
-			chunkSize:      100,
-			expectedChunks: 11, // ceil(1050/100) = 11
-		},
-		{
-			name:           "small table",
-			totalCount:     50,
-			chunkSize:      100,
-			expectedChunks: 1, // no chunking needed
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			numChunks := (tc.totalCount + tc.chunkSize - 1) / tc.chunkSize
-			require.Equal(t, tc.expectedChunks, numChunks, "Chunk calculation should be correct")
-
-			// Test boundary count (should be numChunks - 1)
-			if numChunks > 1 {
-				expectedBoundaries := numChunks - 1
-				require.Greater(t, expectedBoundaries, int64(0), "Should need boundaries for multiple chunks")
-			}
-		})
-	}
-}
-
-func TestBatchQueryGeneration(t *testing.T) {
-	// Test the UNION query generation logic
-	chunkSize := int64(1000)
-	numChunks := int64(4)
-
-	var unionParts []string
-	for i := int64(1); i < numChunks; i++ {
-		offset := i * chunkSize
-		unionParts = append(unionParts, fmt.Sprintf(
-			"(SELECT `id` FROM `test`.`table` ORDER BY `id` LIMIT 1 OFFSET %d)", offset))
-	}
-
-	expectedParts := []string{
-		"(SELECT `id` FROM `test`.`table` ORDER BY `id` LIMIT 1 OFFSET 1000)",
-		"(SELECT `id` FROM `test`.`table` ORDER BY `id` LIMIT 1 OFFSET 2000)",
-		"(SELECT `id` FROM `test`.`table` ORDER BY `id` LIMIT 1 OFFSET 3000)",
-	}
-
-	require.Equal(t, expectedParts, unionParts, "Should generate correct UNION parts")
-	require.Len(t, unionParts, 3, "Should generate numChunks-1 parts")
-
-	// Test batch query construction
-	batchQuery := fmt.Sprintf("SELECT * FROM (%s) AS boundaries ORDER BY `id`",
-		strings.Join(unionParts, " UNION ALL "))
-
-	require.Contains(t, batchQuery, "UNION ALL", "Should use UNION ALL for combining queries")
-	require.Contains(t, batchQuery, "ORDER BY", "Should sort final results")
-}
-
 func TestGetStringOrNumericIndexDetection(t *testing.T) {
 	// This is a unit test for the index detection logic
 	// Testing the type detection part with mock data
diff --git a/dumpling/export/writer.go b/dumpling/export/writer.go

Original file line number	Diff line number	Diff line change
`@@ -1790,7 +1790,7 @@ func TestPickupPossibleField(t *testing.T) {`
`1790`	`1790`	`mock.ExpectQuery(query).WillReturnRows(rows)`
`1791`	`1791`	`}`
`1792`	`1792`
`1793`		`- field, err := pickupPossibleField(tctx, meta, baseConn)`
	`1793`	`+ field, _, err := pickupPossibleField(tctx, meta, baseConn)`
`1794`	`1794`	`if expectedErr != nil {`
`1795`	`1795`	`require.ErrorIs(t, err, expectedErr)`
`1796`	`1796`	`} else {`