@@ -17,6 +17,7 @@ package importer
17
17
import (
18
18
"context"
19
19
"database/sql"
20
+ "path/filepath"
20
21
"strings"
21
22
"sync"
22
23
@@ -39,6 +40,9 @@ type CloudImportSDK interface {
39
40
// GetTableMetaByName returns metadata for a specific table
40
41
GetTableMetaByName (ctx context.Context , schema , table string ) (* TableMeta , error )
41
42
43
+ // GetTotalSize returns the cumulative size (in bytes) of all data files under the source path
44
+ GetTotalSize (ctx context.Context ) (int64 , error )
45
+
42
46
// Close releases resources used by the SDK
43
47
Close () error
44
48
}
@@ -89,10 +93,11 @@ func NewImportSDK(ctx context.Context, sourcePath string, db *sql.DB, options ..
89
93
}
90
94
91
95
ldrCfg := mydump.LoaderConfig {
92
- SourceURL : sourcePath ,
93
- FileRouters : cfg .fileRouteRules ,
94
- // Use default rules only if no custom rules are provided
96
+ SourceURL : sourcePath ,
97
+ Filter : cfg .filter ,
98
+ FileRouters : cfg . fileRouteRules ,
95
99
DefaultFileRules : len (cfg .fileRouteRules ) == 0 ,
100
+ CharacterSet : cfg .charset ,
96
101
}
97
102
98
103
loader , err := mydump .NewLoaderWithStore (ctx , ldrCfg , store )
@@ -115,18 +120,22 @@ type SDKOption func(*sdkConfig)
115
120
116
121
type sdkConfig struct {
117
122
// Loader options
118
- concurrency int
119
- sqlMode mysql.SQLMode
120
- fileRouteRules []* config.FileRouteRule
123
+ concurrency int
124
+ sqlMode mysql.SQLMode
125
+ fileRouteRules []* config.FileRouteRule
126
+ filter []string
127
+ charset string
121
128
122
129
// General options
123
130
logger log.Logger
124
131
}
125
132
126
133
func defaultSDKConfig () * sdkConfig {
127
134
return & sdkConfig {
128
- concurrency : 4 ,
129
- logger : log .L (),
135
+ concurrency : 4 ,
136
+ filter : config .GetDefaultFilter (),
137
+ logger : log .L (),
138
+ charset : "auto" ,
130
139
}
131
140
}
132
141
@@ -153,18 +162,34 @@ func WithSQLMode(mode mysql.SQLMode) SDKOption {
153
162
}
154
163
}
155
164
165
+ // WithFilter specifies a filter for the loader
166
+ func WithFilter (filter []string ) SDKOption {
167
+ return func (cfg * sdkConfig ) {
168
+ cfg .filter = filter
169
+ }
170
+ }
171
+
156
172
// WithFileRouters specifies custom file routing rules
157
173
func WithFileRouters (routers []* config.FileRouteRule ) SDKOption {
158
174
return func (cfg * sdkConfig ) {
159
175
cfg .fileRouteRules = routers
160
176
}
161
177
}
162
178
179
+ // WithCharset specifies the character set for import (default "auto").
180
+ func WithCharset (cs string ) SDKOption {
181
+ return func (cfg * sdkConfig ) {
182
+ if cs != "" {
183
+ cfg .charset = cs
184
+ }
185
+ }
186
+ }
187
+
163
188
// CreateSchemasAndTables implements the CloudImportSDK interface
164
189
func (sdk * ImportSDK ) CreateSchemasAndTables (ctx context.Context ) error {
165
190
dbMetas := sdk .loader .GetDatabases ()
166
191
if len (dbMetas ) == 0 {
167
- return errors .New ("no database schemas found in source path" )
192
+ return errors .New ("no databases found in the source path" )
168
193
}
169
194
170
195
// Create all schemas and tables
@@ -196,12 +221,15 @@ func (sdk *ImportSDK) GetTablesMeta(ctx context.Context) ([]*TableMeta, error) {
196
221
for _ , file := range tblMeta .DataFiles {
197
222
allDataFiles [file .FileMeta .Path ] = file
198
223
}
224
+ if tblMeta .SchemaFile .FileMeta .Path != "" {
225
+ allDataFiles [tblMeta .SchemaFile .FileMeta .Path ] = tblMeta .SchemaFile
226
+ }
199
227
}
200
228
}
201
229
202
230
for _ , dbMeta := range dbMetas {
203
231
for _ , tblMeta := range dbMeta .Tables {
204
- tableMeta , err := sdk .buildTableMeta (ctx , dbMeta , tblMeta , allDataFiles )
232
+ tableMeta , err := sdk .buildTableMeta (dbMeta , tblMeta , allDataFiles )
205
233
if err != nil {
206
234
return nil , errors .Wrapf (err , "failed to build metadata for table %s.%s" ,
207
235
dbMeta .Name , tblMeta .Name )
@@ -217,13 +245,16 @@ func (sdk *ImportSDK) GetTablesMeta(ctx context.Context) ([]*TableMeta, error) {
217
245
func (sdk * ImportSDK ) GetTableMetaByName (ctx context.Context , schema , table string ) (* TableMeta , error ) {
218
246
dbMetas := sdk .loader .GetDatabases ()
219
247
220
- // Collect all data files for pattern matching
248
+ // Collect all data files (and schema files) for pattern matching
221
249
allDataFiles := make (map [string ]mydump.FileInfo )
222
250
for _ , dbMeta := range dbMetas {
223
251
for _ , tblMeta := range dbMeta .Tables {
224
252
for _ , file := range tblMeta .DataFiles {
225
253
allDataFiles [file .FileMeta .Path ] = file
226
254
}
255
+ if tblMeta .SchemaFile .FileMeta .Path != "" {
256
+ allDataFiles [tblMeta .SchemaFile .FileMeta .Path ] = tblMeta .SchemaFile
257
+ }
227
258
}
228
259
}
229
260
@@ -238,7 +269,7 @@ func (sdk *ImportSDK) GetTableMetaByName(ctx context.Context, schema, table stri
238
269
continue
239
270
}
240
271
241
- return sdk .buildTableMeta (ctx , dbMeta , tblMeta , allDataFiles )
272
+ return sdk .buildTableMeta (dbMeta , tblMeta , allDataFiles )
242
273
}
243
274
244
275
return nil , errors .Errorf ("table '%s' not found in schema '%s'" , table , schema )
@@ -247,9 +278,21 @@ func (sdk *ImportSDK) GetTableMetaByName(ctx context.Context, schema, table stri
247
278
return nil , errors .Errorf ("schema '%s' not found" , schema )
248
279
}
249
280
281
+ // GetTotalSize implements CloudImportSDK interface
282
+ func (sdk * ImportSDK ) GetTotalSize (ctx context.Context ) (int64 , error ) {
283
+ tables , err := sdk .GetTablesMeta (ctx )
284
+ if err != nil {
285
+ return 0 , err
286
+ }
287
+ var total int64
288
+ for _ , tbl := range tables {
289
+ total += tbl .TotalSize
290
+ }
291
+ return total , nil
292
+ }
293
+
250
294
// buildTableMeta creates a TableMeta from database and table metadata
251
295
func (sdk * ImportSDK ) buildTableMeta (
252
- ctx context.Context ,
253
296
dbMeta * mydump.MDDatabaseMeta ,
254
297
tblMeta * mydump.MDTableMeta ,
255
298
allDataFiles map [string ]mydump.FileInfo ,
@@ -279,7 +322,7 @@ func (sdk *ImportSDK) buildTableMeta(
279
322
if err != nil {
280
323
return nil , errors .Trace (err )
281
324
}
282
- tableMeta .WildcardPath = wildcard
325
+ tableMeta .WildcardPath = strings . TrimSuffix ( sdk . store . URI (), "/" ) + "/" + wildcard
283
326
284
327
return tableMeta , nil
285
328
}
@@ -289,8 +332,10 @@ func (sdk *ImportSDK) Close() error {
289
332
sdk .mu .Lock ()
290
333
defer sdk .mu .Unlock ()
291
334
292
- // Nothing to close at the moment, but this could be used
293
- // to clean up resources in the future
335
+ // close external storage
336
+ if sdk .store != nil {
337
+ sdk .store .Close ()
338
+ }
294
339
return nil
295
340
}
296
341
@@ -342,7 +387,6 @@ func (sdk *ImportSDK) generateWildcard(
342
387
// Try different pattern generation strategies in order of specificity
343
388
patterns := []string {
344
389
generateMydumperPattern (paths ), // Specific to Mydumper format
345
- generateDirectoryPattern (paths ), // Try directory-based pattern
346
390
generatePrefixSuffixPattern (paths ), // Generic prefix/suffix pattern
347
391
}
348
392
@@ -356,51 +400,17 @@ func (sdk *ImportSDK) generateWildcard(
356
400
return "" , errors .New ("unable to generate a specific wildcard pattern for this table's data files" )
357
401
}
358
402
359
- // generateDirectoryPattern attempts to create a pattern based on directory structure
360
- func generateDirectoryPattern (paths []string ) string {
361
- // Get common directory prefix
362
- dirPrefix := extractCommonDirectory (paths )
363
- if dirPrefix == "" {
364
- return ""
365
- }
366
-
367
- // See if all files are in the same directory
368
- allSameDir := true
369
- for _ , path := range paths {
370
- lastSlash := strings .LastIndex (path , "/" )
371
- if lastSlash < 0 || path [:lastSlash + 1 ] != dirPrefix {
372
- allSameDir = false
373
- break
374
- }
375
- }
376
-
377
- if allSameDir {
378
- // Try to find common filename patterns within the directory
379
- fileNames := make ([]string , len (paths ))
380
- for i , path := range paths {
381
- fileNames [i ] = path [len (dirPrefix ):]
382
- }
383
-
384
- filePrefix := longestCommonPrefix (fileNames )
385
- if filePrefix != "" {
386
- return dirPrefix + filePrefix + "*"
387
- }
388
-
389
- // If no common filename prefix, just use the directory
390
- return dirPrefix + "*"
391
- }
392
-
393
- return ""
394
- }
395
-
396
403
// validatePattern checks if a wildcard pattern matches only the table's files
397
404
func validatePattern (pattern string , tableFiles map [string ]struct {}, allFiles map [string ]mydump.FileInfo ) bool {
398
405
if pattern == "" {
399
406
return false
400
407
}
401
408
402
409
for path := range allFiles {
403
- isMatch := wildcardMatches (pattern , path )
410
+ isMatch , err := filepath .Match (pattern , path )
411
+ if err != nil {
412
+ return false // Invalid pattern
413
+ }
404
414
_ , isTableFile := tableFiles [path ]
405
415
406
416
// If pattern matches a file that's not from our table, it's invalid
@@ -417,23 +427,6 @@ func validatePattern(pattern string, tableFiles map[string]struct{}, allFiles ma
417
427
return true
418
428
}
419
429
420
- // wildcardMatches checks if a path matches a wildcard pattern
421
- // This implementation handles patterns with a single * wildcard
422
- func wildcardMatches (pattern , path string ) bool {
423
- if ! strings .Contains (pattern , "*" ) {
424
- return pattern == path
425
- }
426
-
427
- parts := strings .Split (pattern , "*" )
428
- if len (parts ) != 2 {
429
- // This implementation only handles a single wildcard
430
- return false
431
- }
432
-
433
- prefix , suffix := parts [0 ], parts [1 ]
434
- return strings .HasPrefix (path , prefix ) && strings .HasSuffix (path , suffix ) && len (path ) >= len (prefix )+ len (suffix )
435
- }
436
-
437
430
// generateMydumperPattern creates a pattern optimized for Mydumper naming conventions
438
431
func generateMydumperPattern (paths []string ) string {
439
432
// Check if paths appear to follow Mydumper naming convention
@@ -580,20 +573,19 @@ func generatePrefixSuffixPattern(paths []string) string {
580
573
return paths [0 ]
581
574
}
582
575
583
- // Find common prefix and suffix
584
576
prefix := longestCommonPrefix (paths )
585
577
suffix := longestCommonSuffix (paths )
586
578
587
- // If prefix and suffix would overlap, adjust them
588
- if len (prefix )+ len (suffix ) > len (paths [0 ]) {
589
- overlap := len (prefix ) + len (suffix ) - len (paths [0 ])
590
- suffix = suffix [overlap :]
579
+ minLen := len (paths [0 ])
580
+ for _ , p := range paths [1 :] {
581
+ if len (p ) < minLen {
582
+ minLen = len (p )
583
+ }
591
584
}
592
-
593
- // Construct pattern with appropriate wildcards
594
- if prefix != "" || suffix != "" {
595
- return prefix + "*" + suffix
585
+ maxSuffixLen := minLen - len (prefix )
586
+ if len (suffix ) > maxSuffixLen {
587
+ suffix = suffix [len (suffix )- maxSuffixLen :]
596
588
}
597
589
598
- return ""
590
+ return prefix + "*" + suffix
599
591
}
0 commit comments