Skip to content

Commit 047c3ba

Browse files
authored
fix(bq2bq): handle BQ dry run error caused by self table SQL reference (#5)
1 parent 631527a commit 047c3ba

File tree

2 files changed

+24
-42
lines changed

2 files changed

+24
-42
lines changed

task/bq2bq/main.go

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -389,16 +389,30 @@ func (b *BQ2BQ) GenerateDependencies(ctx context.Context, request models.Generat
389389
return nil, errors.New("empty sql file")
390390
}
391391

392+
selfTable, err := b.GenerateDestination(ctx, models.GenerateDestinationRequest{
393+
Config: request.Config,
394+
Assets: request.Assets,
395+
Project: request.Project,
396+
})
397+
if err != nil {
398+
return response, err
399+
}
400+
392401
// first parse sql statement to find dependencies and ignored tables
393-
parsedDependencies, ignoredDependencies, err := b.FindDependenciesWithRegex(ctx, request)
402+
parsedDependencies, ignoredDependencies, err := b.FindDependenciesWithRegex(ctx, queryData.Value, selfTable.Destination)
394403
if err != nil {
395404
return response, err
396405
}
397406

398407
// try to resolve referenced tables directly from BQ APIs
399408
response.Dependencies, err = b.FindDependenciesWithRetryableDryRun(timeoutCtx, queryData.Value, svcAcc)
400409
if err != nil {
401-
return response, err
410+
// SQL query with reference to destination table such as DML and self joins will have dependency
411+
// cycle on dry run since the table might not be available yet. We check the error from BQ
412+
// to ignore if the error message contains destination table not found.
413+
if !strings.Contains(err.Error(), fmt.Sprintf("Not found: Table %s was not found", selfTable.Destination)) {
414+
return response, err
415+
}
402416
}
403417

404418
if len(response.Dependencies) == 0 {
@@ -443,15 +457,6 @@ func (b *BQ2BQ) GenerateDependencies(ctx context.Context, request models.Generat
443457
}
444458
}
445459

446-
// before returning remove self
447-
selfTable, err := b.GenerateDestination(ctx, models.GenerateDestinationRequest{
448-
Config: request.Config,
449-
Assets: request.Assets,
450-
Project: request.Project,
451-
})
452-
if err != nil {
453-
return response, err
454-
}
455460
response.Dependencies = removeString(response.Dependencies, selfTable.Destination)
456461

457462
// before returning remove ignored tables
@@ -465,7 +470,7 @@ func (b *BQ2BQ) GenerateDependencies(ctx context.Context, request models.Generat
465470

466471
// FindDependenciesWithRegex look for table patterns in SQL query to find
467472
// source tables.
468-
// Config is required to generate destination and avoid cycles
473+
// Task destination is required to avoid cycles
469474
//
470475
// we look for certain patterns in the query source code
471476
// in particular, we look for the following constructs
@@ -486,29 +491,17 @@ func (b *BQ2BQ) GenerateDependencies(ctx context.Context, request models.Generat
486491
// they're a single sequence of characters. But on the other hand
487492
// this also means that otherwise valid reference to "dataset.table"
488493
// will not be recognised.
489-
func (b *BQ2BQ) FindDependenciesWithRegex(ctx context.Context, request models.GenerateDependenciesRequest) ([]string, []string, error) {
494+
func (b *BQ2BQ) FindDependenciesWithRegex(ctx context.Context, queryString string, destination string) ([]string, []string, error) {
490495

491-
queryData, ok := request.Assets.Get(QueryFileName)
492-
if !ok {
493-
return nil, nil, errors.New("empty sql file")
494-
}
495-
queryString := queryData.Value
496496
tablesFound := make(map[string]bool)
497497
pseudoTables := make(map[string]bool)
498498
var tablesIgnored []string
499499

500500
// we mark destination as a pseudo table to avoid a dependency
501501
// cycle. This is for supporting DML queries that may also refer
502502
// to themselves.
503-
dest, err := b.GenerateDestination(ctx, models.GenerateDestinationRequest{
504-
Config: request.Config,
505-
Assets: request.Assets,
506-
Project: request.Project,
507-
})
508-
if err != nil {
509-
return nil, nil, err
510-
}
511-
pseudoTables[dest.Destination] = true
503+
504+
pseudoTables[destination] = true
512505

513506
// remove comments from query
514507
matches := queryCommentPatterns.FindAllStringSubmatch(queryString, -1)

task/bq2bq/main_test.go

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -357,23 +357,12 @@ func TestBQ2BQ(t *testing.T) {
357357
Value: test.Query,
358358
},
359359
})),
360-
Config: models.PluginConfigs{}.FromJobSpec(models.JobSpecConfigs{
361-
{
362-
Name: "PROJECT",
363-
Value: "proj",
364-
},
365-
{
366-
Name: "DATASET",
367-
Value: "datas",
368-
},
369-
{
370-
Name: "TABLE",
371-
Value: "tab",
372-
},
373-
}),
374360
}
361+
362+
queryData, _ := data.Assets.Get(QueryFileName)
363+
destination := "proj.datas.tab"
375364
b2b := &BQ2BQ{}
376-
deps, ignored, err := b2b.FindDependenciesWithRegex(context.Background(), data)
365+
deps, ignored, err := b2b.FindDependenciesWithRegex(context.Background(), queryData.Value, destination)
377366
assert.Nil(t, err)
378367
assert.Equal(t, test.Sources, newSet(deps...))
379368
assert.Equal(t, test.Ignored, newSet(ignored...))

0 commit comments

Comments
 (0)