Skip to content

feat(csharp/src/Drivers/BigQuery): support evaluation kind and statement type setting #2698

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion csharp/src/Drivers/BigQuery/BigQueryConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1058,7 +1058,10 @@ private IReadOnlyDictionary<string, string> ParseOptions()
BigQueryParameters.LargeDecimalsAsString,
BigQueryParameters.LargeResultsDestinationTable,
BigQueryParameters.GetQueryResultsOptionsTimeout,
BigQueryParameters.MaxFetchConcurrency
BigQueryParameters.MaxFetchConcurrency,
BigQueryParameters.StatementType,
BigQueryParameters.StatementIndex,
BigQueryParameters.EvaluationKind
};

foreach (string key in statementOptions)
Expand Down
3 changes: 3 additions & 0 deletions csharp/src/Drivers/BigQuery/BigQueryParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ public class BigQueryParameters
public const string GetQueryResultsOptionsTimeout = "adbc.bigquery.get_query_results_options.timeout";
public const string MaxFetchConcurrency = "adbc.bigquery.max_fetch_concurrency";
public const string IncludePublicProjectId = "adbc.bigquery.include_public_project_id";
public const string StatementType = "adbc.bigquery.multiple_statement.statement_type";
public const string StatementIndex = "adbc.bigquery.multiple_statement.statement_index";
public const string EvaluationKind = "adbc.bigquery.multiple_statement.evaluation_kind";
}

/// <summary>
Expand Down
61 changes: 38 additions & 23 deletions csharp/src/Drivers/BigQuery/BigQueryStatement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,52 +51,71 @@ public BigQueryStatement(BigQueryClient client, GoogleCredential credential)

public override QueryResult ExecuteQuery()
{
// Create job
QueryOptions queryOptions = ValidateOptions();

BigQueryJob job = this.client.CreateQueryJob(SqlQuery, null, queryOptions);

// Get results
GetQueryResultsOptions getQueryResultsOptions = new GetQueryResultsOptions();

if (this.Options?.TryGetValue(BigQueryParameters.GetQueryResultsOptionsTimeout, out string? timeoutSeconds) == true &&
int.TryParse(timeoutSeconds, out int seconds) &&
seconds >= 0)
{
getQueryResultsOptions.Timeout = TimeSpan.FromSeconds(seconds);
}

BigQueryResults results = job.GetQueryResults(getQueryResultsOptions);

BigQueryReadClientBuilder readClientBuilder = new BigQueryReadClientBuilder();
readClientBuilder.Credential = this.credential;
BigQueryReadClient readClient = readClientBuilder.Build();

// For multi-statement queries, the results.TableReference is null
if (results.TableReference == null)
{
// To get the results of all statements in a multi-statement query, enumerate the child jobs and call jobs.getQueryResults on each of them.
// Related public docs: https://cloud.google.com/bigquery/docs/multi-statement-queries#get_all_executed_statements
string statementType = string.Empty;
if (this.Options?.TryGetValue(BigQueryParameters.StatementType, out string? statementTypeString) == true)
{
statementType = statementTypeString;
}
int statementIndex = 1;
if (this.Options?.TryGetValue(BigQueryParameters.StatementIndex, out string? statementIndexString) == true &&
int.TryParse(statementIndexString, out int statementIndexInt) &&
statementIndexInt > 0)
{
statementIndex = statementIndexInt;
}
string evaluationKind = string.Empty;
if (this.Options?.TryGetValue(BigQueryParameters.EvaluationKind, out string? evaluationKindString) == true)
{
evaluationKind = evaluationKindString;
}

// To get the results of all statements in a multi-statement query, enumerate the child jobs. Related public docs: https://cloud.google.com/bigquery/docs/multi-statement-queries#get_all_executed_statements.
// Can filter by StatementType and EvaluationKind. Related public docs: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobstatistics2, https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#evaluationkind
ListJobsOptions listJobsOptions = new ListJobsOptions();
listJobsOptions.ParentJobId = results.JobReference.JobId;
PagedEnumerable<JobList, BigQueryJob> joblist = client.ListJobs(listJobsOptions);
BigQueryJob firstQueryJob = new BigQueryJob(client, job.Resource);
foreach (BigQueryJob childJob in joblist)
var joblist = client.ListJobs(listJobsOptions)
.Select(job => client.GetJob(job.Reference))
.Where(job => string.IsNullOrEmpty(evaluationKind) || job.Statistics.ScriptStatistics.EvaluationKind.Equals(evaluationKind, StringComparison.OrdinalIgnoreCase))
.Where(job => string.IsNullOrEmpty(statementType) || job.Statistics.Query.StatementType.Equals(statementType,StringComparison.OrdinalIgnoreCase))
.OrderBy(job => job.Resource.Statistics.CreationTime)
.ToList();

if (joblist.Count > 0)
{
var tempJob = client.GetJob(childJob.Reference);
var query = tempJob.Resource?.Configuration?.Query;
if (query != null && query.DestinationTable != null && query.DestinationTable.ProjectId != null && query.DestinationTable.DatasetId != null && query.DestinationTable.TableId != null)
if (statementIndex < 1 || statementIndex > joblist.Count)
{
firstQueryJob = tempJob;
throw new ArgumentOutOfRangeException($"The specified index {statementIndex} is out of range. There are {joblist.Count} jobs available.");
}
results = joblist[statementIndex - 1].GetQueryResults(getQueryResultsOptions);
}
results = firstQueryJob.GetQueryResults();
}

if (results.TableReference == null)
{
throw new AdbcException("There is no query statement");
}

// BigQuery Read Client for streaming
BigQueryReadClientBuilder readClientBuilder = new BigQueryReadClientBuilder();
readClientBuilder.Credential = this.credential;
BigQueryReadClient readClient = readClientBuilder.Build();
string table = $"projects/{results.TableReference.ProjectId}/datasets/{results.TableReference.DatasetId}/tables/{results.TableReference.TableId}";

int maxStreamCount = 1;
if (this.Options?.TryGetValue(BigQueryParameters.MaxFetchConcurrency, out string? maxStreamCountString) == true)
{
Expand All @@ -110,16 +129,12 @@ public override QueryResult ExecuteQuery()
}
ReadSession rs = new ReadSession { Table = table, DataFormat = DataFormat.Arrow };
ReadSession rrs = readClient.CreateReadSession("projects/" + results.TableReference.ProjectId, rs, maxStreamCount);

long totalRows = results.TotalRows == null ? -1L : (long)results.TotalRows.Value;

var readers = rrs.Streams
.Select(s => ReadChunk(readClient, s.Name))
.Where(chunk => chunk != null)
.Cast<IArrowReader>();

IArrowArrayStream stream = new MultiArrowReader(TranslateSchema(results.Schema), readers);

return new QueryResult(totalRows, stream);
}

Expand Down
9 changes: 9 additions & 0 deletions csharp/src/Drivers/BigQuery/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,15 @@ https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/G
**adbc.bigquery.max_fetch_concurrency**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional. Sets the [maxStreamCount](https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.Storage.V1/latest/Google.Cloud.BigQuery.Storage.V1.BigQueryReadClient#Google_Cloud_BigQuery_Storage_V1_BigQueryReadClient_CreateReadSession_System_String_Google_Cloud_BigQuery_Storage_V1_ReadSession_System_Int32_Google_Api_Gax_Grpc_CallSettings_) for the CreateReadSession method. If not set, defaults to 1.

**adbc.bigquery.multiple_statement.statement_type**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional. When executing multiple statements, limit the type of statement returned. If not set, all types of statements are returned.

**adbc.bigquery.multiple_statement.statement_index**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional. When executing multiple statements, specify the result of the statement to be returned (Minimum value is 1). If not set, the result of the first statement is returned.

**adbc.bigquery.multiple_statement.evaluation_kind**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional. When executing multiple statements, limit the evaluation kind returned. If not set, all evaluation kinds are returned.

**adbc.bigquery.include_constraints_getobjects**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional. Some callers do not need the constraint details when they get the table information and can improve the speed of obtaining the results. Setting this value to `"false"` will not include the constraint details. The default value is `"true"`.

Expand Down
9 changes: 9 additions & 0 deletions csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,15 @@ public BigQueryTestEnvironment()
[JsonPropertyName("maxStreamCount")]
public int? MaxStreamCount { get; set; }

[JsonPropertyName("statementType")]
public string StatementType { get; set; } = string.Empty;

[JsonPropertyName("statementIndex")]
public int? StatementIndex { get; set; }

[JsonPropertyName("evaluationKind")]
public string EvaluationKind { get; set; } = string.Empty;

/// <summary>
/// How structs should be handled by the ADO.NET client for this environment.
/// </summary>
Expand Down
15 changes: 15 additions & 0 deletions csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,21 @@ internal static Dictionary<string, string> GetBigQueryParameters(BigQueryTestEnv
parameters.Add(BigQueryParameters.MaxFetchConcurrency, testEnvironment.MaxStreamCount.Value.ToString());
}

if (!string.IsNullOrEmpty(testEnvironment.StatementType))
{
parameters.Add(BigQueryParameters.StatementType, testEnvironment.StatementType);
}

if (testEnvironment.StatementIndex.HasValue)
{
parameters.Add(BigQueryParameters.StatementIndex, testEnvironment.StatementIndex.Value.ToString());
}

if (!string.IsNullOrEmpty(testEnvironment.EvaluationKind))
{
parameters.Add(BigQueryParameters.EvaluationKind, testEnvironment.EvaluationKind);
}

return parameters;
}

Expand Down
23 changes: 23 additions & 0 deletions csharp/test/Drivers/BigQuery/DriverTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -346,5 +346,28 @@ public void QueryTimeoutTest()
}
}
}

/// <summary>
/// Validates if the driver can connect to a live server and
/// parse the results of multi-statements.
/// </summary>
[SkippableFact, Order(9)]
public void CanExecuteMultiStatementQuery()
{
foreach (BigQueryTestEnvironment environment in _environments)
{
AdbcConnection adbcConnection = GetAdbcConnection(environment.Name);
AdbcStatement statement = adbcConnection.CreateStatement();
string query1 = "SELECT * FROM bigquery-public-data.covid19_ecdc.covid_19_geographic_distribution_worldwide";
string query2 = "SELECT " +
"CAST(1.7976931348623157e+308 as FLOAT64) as number, " +
"PARSE_NUMERIC(\"9.99999999999999999999999999999999E+28\") as decimal, " +
"PARSE_BIGNUMERIC(\"5.7896044618658097711785492504343953926634992332820282019728792003956564819968E+37\") as big_decimal";
string combinedQuery = query1 + ";" + query2 + ";";
statement.SqlQuery = combinedQuery;
QueryResult queryResult = statement.ExecuteQuery();
Tests.DriverTests.CanExecuteQuery(queryResult, 61900, environment.Name);
}
}
}
}
3 changes: 3 additions & 0 deletions csharp/test/Drivers/BigQuery/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ The following values can be setup in the configuration
- **scopes** - Comma separated list (string) of scopes applied during the test.
- **queryTimeout** - The timeout (in seconds) for a query. Similar to a CommandTimeout.
- **maxStreamCount** - The max stream count.
- **statementType** - When executing multiple statements, limit the type of statement returned.
- **statementIndex** - When executing multiple statements, specify the result of the statement to be returned.
- **evaluationKind** - When executing multiple statements, limit the evaluation kind returned.
- **includeTableConstraints** - Whether to include table constraints in the GetObjects query.
- **largeResultsDestinationTable** - Sets the [DestinationTable](https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/Google.Cloud.BigQuery.V2.QueryOptions#Google_Cloud_BigQuery_V2_QueryOptions_DestinationTable) value of the QueryOptions if configured. Expects the format to be `{projectId}.{datasetId}.{tableId}` to set the corresponding values in the [TableReference](https://github.com/googleapis/google-api-dotnet-client/blob/6c415c73788b848711e47c6dd33c2f93c76faf97/Src/Generated/Google.Apis.Bigquery.v2/Google.Apis.Bigquery.v2.cs#L9348) class.
- **allowLargeResults** - Whether to allow large results .
Expand Down
Loading