Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion csharp/src/StatementExecution/StatementExecutionClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using Apache.Arrow.Adbc;

namespace AdbcDrivers.Databricks.StatementExecution
{
Expand Down Expand Up @@ -403,7 +404,19 @@ private async Task EnsureSuccessStatusCodeAsync(HttpResponseMessage response)
errorMessage = $"{errorMessage}. Response: {errorContent}";
}

throw new DatabricksException(errorMessage);
var statusCode = response.StatusCode switch
{
System.Net.HttpStatusCode.Unauthorized or System.Net.HttpStatusCode.Forbidden
=> AdbcStatusCode.Unauthorized,
System.Net.HttpStatusCode.NotFound
=> AdbcStatusCode.NotFound,
System.Net.HttpStatusCode.Conflict
=> AdbcStatusCode.AlreadyExists,
System.Net.HttpStatusCode.BadRequest
=> AdbcStatusCode.InvalidArgument,
_ => AdbcStatusCode.IOError,
};
throw new DatabricksException(errorMessage, statusCode);
}
}
}
77 changes: 62 additions & 15 deletions csharp/src/StatementExecution/StatementExecutionConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,9 @@ private StatementExecutionConnection(
if (_enableMultipleCatalogSupport)
{
properties.TryGetValue(AdbcOptions.Connection.CurrentCatalog, out _catalog);
// Match Thrift behavior: SPARK is a legacy alias — map it to null so the
// runtime falls back to the workspace default (typically hive_metastore).
_catalog = DatabricksConnection.HandleSparkCatalog(_catalog);
}
properties.TryGetValue(AdbcOptions.Connection.CurrentDbSchema, out _schema);

Expand Down Expand Up @@ -380,8 +383,22 @@ public async Task OpenAsync(CancellationToken cancellationToken = default)
SessionConfigs = sessionConfigs.Count > 0 ? sessionConfigs : null
};

var response = await _client.CreateSessionAsync(request, cancellationToken).ConfigureAwait(false);
_sessionId = response.SessionId;
try
{
var response = await _client.CreateSessionAsync(request, cancellationToken).ConfigureAwait(false);
_sessionId = response.SessionId;
}
catch (DatabricksException)
{
throw;
}
catch (Exception ex)
{
throw new DatabricksException(
$"Failed to connect to Databricks: {ex.GetBaseException().Message}",
AdbcStatusCode.IOError,
ex);
}

// If user didn't specify a catalog, discover the server's default.
// In Thrift, the server returns this in OpenSessionResp.InitialNamespace.
Expand Down Expand Up @@ -422,6 +439,18 @@ public override AdbcStatement CreateStatement()
this); // Pass connection as TracingConnection for tracing support
}

public override void SetOption(string key, string? value)
{
switch (key)
{
case AdbcOptions.Telemetry.TraceParent:
SetTraceParent(string.IsNullOrWhiteSpace(value) ? null : value);
return;
}

base.SetOption(key, value);
}

public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? catalogPattern, string? schemaPattern, string? tableNamePattern, IReadOnlyList<string>? tableTypes, string? columnNamePattern)
{
return this.TraceActivity(activity =>
Expand All @@ -432,6 +461,13 @@ public override IArrowArrayStream GetObjects(GetObjectsDepth depth, string? cata
activity?.SetTag("table_pattern", tableNamePattern ?? "(none)");
activity?.SetTag("column_pattern", columnNamePattern ?? "(none)");

// Databricks identifiers are case-insensitive — lowercase patterns
// to match server behavior (same as DatabricksConnection/Thrift path).
catalogPattern = catalogPattern?.ToLower();
schemaPattern = schemaPattern?.ToLower();
tableNamePattern = tableNamePattern?.ToLower();
columnNamePattern = columnNamePattern?.ToLower();

using var cts = CreateMetadataTimeoutCts();
return GetObjectsResultBuilder.BuildGetObjectsResultAsync(
this, depth, catalogPattern, schemaPattern,
Expand Down Expand Up @@ -557,7 +593,7 @@ async Task<IReadOnlyList<string>> IGetObjectsDataProvider.GetCatalogsAsync(strin
string sql = new ShowSchemasCommand(catalogPattern, schemaPattern).Build();
var batches = await ExecuteMetadataSqlAsync(sql, cancellationToken).ConfigureAwait(false);

// SHOW SCHEMAS IN ALL CATALOGS returns 2 columns: catalog, databaseName
// SHOW SCHEMAS IN ALL CATALOGS returns 2 columns: databaseName, catalog
// SHOW SCHEMAS IN `catalog` returns 1 column: databaseName
bool showSchemasInAllCatalogs = catalogPattern == null;

Expand All @@ -569,8 +605,8 @@ async Task<IReadOnlyList<string>> IGetObjectsDataProvider.GetCatalogsAsync(strin

if (showSchemasInAllCatalogs)
{
catalogArray = batch.Column(0) as StringArray;
schemaArray = batch.Column(1) as StringArray;
schemaArray = batch.Column(0) as StringArray;
catalogArray = batch.Column(1) as StringArray;
}
else
{
Expand Down Expand Up @@ -678,20 +714,25 @@ async Task IGetObjectsDataProvider.PopulateColumnInfoAsync(string? catalogPatter
tableInfo, colName, colType, position, nullable);

// Match Thrift GetObjects behavior: SparkConnection.SetPrecisionScaleAndTypeName
// only sets Precision/Scale for DECIMAL, NUMERIC, CHAR, NCHAR, VARCHAR,
// NVARCHAR, LONGVARCHAR, LONGNVARCHAR. All other types get null.
// sets Precision for DECIMAL, NUMERIC, CHAR, NCHAR, VARCHAR, NVARCHAR,
// LONGVARCHAR, LONGNVARCHAR. Sets Scale only for DECIMAL/NUMERIC.
// All other types get null for both.
int lastIdx = tableInfo.Precision.Count - 1;
short typeCode = tableInfo.ColType[lastIdx];
if (typeCode != (short)HiveServer2Connection.ColumnTypeId.DECIMAL
&& typeCode != (short)HiveServer2Connection.ColumnTypeId.NUMERIC
&& typeCode != (short)HiveServer2Connection.ColumnTypeId.CHAR
&& typeCode != (short)HiveServer2Connection.ColumnTypeId.NCHAR
&& typeCode != (short)HiveServer2Connection.ColumnTypeId.VARCHAR
&& typeCode != (short)HiveServer2Connection.ColumnTypeId.NVARCHAR
&& typeCode != (short)HiveServer2Connection.ColumnTypeId.LONGVARCHAR
&& typeCode != (short)HiveServer2Connection.ColumnTypeId.LONGNVARCHAR)
bool isDecimalOrNumeric = typeCode == (short)HiveServer2Connection.ColumnTypeId.DECIMAL
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we need to set these to null after PopulateTableInfoFromTypeName is already processed?
Can we just set everything right in PopulateTableInfoFromTypeName?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Valid suggestion. PopulateTableInfoFromTypeName sets non-null values for ALL types (e.g., Scale=0 for BOOLEAN, Scale=7 for FLOAT). But Thrift's SparkConnection.SetPrecisionScaleAndTypeName only sets Precision/Scale for DECIMAL/NUMERIC/CHAR/VARCHAR types — everything else gets null.

The post-processing null-setting was the original design to match Thrift parity. The cleaner fix would be to update PopulateTableInfoFromTypeName itself, but it's in ColumnMetadataHelper which is also used by FlatColumnsResultBuilder (GetColumns path) where the values ARE needed for all types. So the nulling has to happen at the GetObjects call site, not inside the shared helper.

We could extract this into a separate helper method called after PopulateTableInfoFromTypeName for clarity, but moving the logic inside would change behavior for the GetColumns path.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still wanna revisit this.
I think we should fix this function to only return scale for decimal there => Currently it seems the FlatColumnsResultBuilder return scale for non-decimal fields which seems wrong?
The columnSize seems is already correctly handled.

There is not any usage for the GetObjects call now, so I would like to start correct. The current Thrift GetObjects has NO Databricks customization so it's a very very basic one, I would not mind diverge from that

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 83a9750. Changed GetDecimalDigitsDefault to return null for all non-DECIMAL/NUMERIC types (was returning 0/7/15/6). Now PopulateTableInfoFromTypeName sets Scale to null from the start for non-DECIMAL types, so the post-processing only needs to null out Precision for non-DECIMAL/CHAR types. This also fixes the FlatColumnsResultBuilder (GetColumns) path which was incorrectly returning Scale values for non-DECIMAL columns.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Further update in c9847c0 — removed the post-processing entirely. GetColumnSizeDefault and GetDecimalDigitsDefault values are now kept for all types in GetObjects, matching GetColumns/GetColumnsExtended. This provides consistent Precision/Scale values across all metadata APIs rather than nulling them for GetObjects only.

|| typeCode == (short)HiveServer2Connection.ColumnTypeId.NUMERIC;
bool isCharType = typeCode == (short)HiveServer2Connection.ColumnTypeId.CHAR
|| typeCode == (short)HiveServer2Connection.ColumnTypeId.NCHAR
|| typeCode == (short)HiveServer2Connection.ColumnTypeId.VARCHAR
|| typeCode == (short)HiveServer2Connection.ColumnTypeId.NVARCHAR
|| typeCode == (short)HiveServer2Connection.ColumnTypeId.LONGVARCHAR
|| typeCode == (short)HiveServer2Connection.ColumnTypeId.LONGNVARCHAR;
if (!isDecimalOrNumeric && !isCharType)
{
tableInfo.Precision[lastIdx] = null;
}
if (!isDecimalOrNumeric)
{
tableInfo.Scale[lastIdx] = null;
}
}
Expand Down Expand Up @@ -767,6 +808,12 @@ internal List<RecordBatch> ExecuteMetadataSql(string sql, CancellationToken canc
/// <summary>
/// Queries the server for the current catalog via SELECT CURRENT_CATALOG().
/// </summary>
/// <summary>
/// Returns the session's default catalog. Used by statements when
/// enableMultipleCatalogSupport=false and no catalog was specified.
/// </summary>
internal string? GetSessionDefaultCatalog() => GetCurrentCatalog();

private string? GetCurrentCatalog()
{
var batches = ExecuteMetadataSql("SELECT CURRENT_CATALOG()");
Expand Down
52 changes: 45 additions & 7 deletions csharp/src/StatementExecution/StatementExecutionStatement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ public StatementExecutionStatement(
_lz4BufferPool = lz4BufferPool ?? throw new ArgumentNullException(nameof(lz4BufferPool));
_httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient));
_enableComplexDatatypeSupport = connection.EnableComplexDatatypeSupport;

// Match Thrift: statement starts with connection's default catalog.
// When enableMultipleCatalogSupport=true, this is the catalog from config (e.g. "main").
// When false, _catalog is null (not set from config), matching Thrift behavior.
_metadataCatalogName = catalog;
}

/// <summary>
Expand Down Expand Up @@ -189,6 +194,10 @@ public override void SetOption(string key, string value)
case DatabricksParameters.MaxBytesPerFetchRequest:
break;

case AdbcOptions.Telemetry.TraceParent:
SetTraceParent(string.IsNullOrEmpty(value) ? null : value);
break;

default:
base.SetOption(key, value);
break;
Expand Down Expand Up @@ -615,8 +624,9 @@ public async Task<UpdateResult> ExecuteUpdateAsync(CancellationToken cancellatio
throw new AdbcException("Statement was closed before results could be retrieved");
}

// For updates, we don't need to read the results - just return the row count
long rowCount = response.Manifest?.TotalRowCount ?? 0;
// For updates, we don't need to read the results - just return the row count.
// Default to -1 (unknown) when no manifest/row count, matching Thrift behavior for DDL.
long rowCount = response.Manifest?.TotalRowCount ?? -1;
return new UpdateResult(rowCount);
}

Expand Down Expand Up @@ -791,7 +801,33 @@ private static async Task<byte[]> FetchAllChunksAsync(

// Metadata command routing

private string? EffectiveCatalog => _connection.ResolveEffectiveCatalog(_metadataCatalogName);
/// <summary>
/// Resolves the catalog for metadata SQL commands.
/// Matches Thrift behavior:
/// - SPARK → null (all catalogs)
/// - Other values pass through as-is
/// - null stays null
/// When enableMultipleCatalogSupport=false and result is null,
/// resolves to the session default catalog (SEA SQL requires an explicit
/// catalog for SHOW commands when not querying all catalogs).
/// </summary>
private string? EffectiveCatalog
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember you already have a similar function like this?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes — ResolveEffectiveCatalog on the connection (line 793) still exists and is used by GetTableSchema. This new EffectiveCatalog property is on the statement and replaced the old one-liner _connection.ResolveEffectiveCatalog(_metadataCatalogName).

The reason we can't reuse ResolveEffectiveCatalog is that it does normalized ?? _catalog (always falls back to connection default). That's wrong when the user explicitly SetOption(CatalogName, "SPARK") — that should mean "all catalogs" (null), not fall back to default.

The statement's EffectiveCatalog has different semantics:

  • enableMultipleCatalogSupport=true: passes null through (all catalogs) when SPARK is set
  • enableMultipleCatalogSupport=false: resolves null to session default via GetSessionDefaultCatalog()

The connection's ResolveEffectiveCatalog is still correct for GetTableSchema where you always need a concrete catalog (you can't do SHOW COLUMNS IN ALL CATALOGS).

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it just SHOW COLUMNS IN ALL CATALOGS not permitted?
What about SHOW TABLES IN ALL CATALOGS which is called from the statement level?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We support SHOW TABLES IN ALL CATALOGS. For SHOW COLUMNS, the PR is merged in runtime but not yet released

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK so when that is released we can change the connection level ResolveEffectiveCatalog to use this one? Let's add a comment if so.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Added a TODO in f7694ed. Once the backend supports SHOW COLUMNS IN ALL CATALOGS, we can consolidate. Right now the connection's ResolveEffectiveCatalog is only used by GetTableSchema (which always needs a concrete catalog), so it still needs the fallback. The PopulateColumnInfoAsync path now uses ExecuteShowColumnsAsync which handles null catalog via the iterate-all-catalogs fallback.

{
get
{
// Normalize SPARK → null, same as Thrift's HandleSparkCatalog
string? catalog = DatabricksConnection.HandleSparkCatalog(_metadataCatalogName);

if (_connection.EnableMultipleCatalogSupport)
{
// null means "all catalogs" (e.g. SHOW SCHEMAS IN ALL CATALOGS)
return catalog;
}

// flag=false: null means use session default (SEA SQL needs explicit catalog)
return catalog ?? _connection.GetSessionDefaultCatalog();
}
}

/// <summary>
/// Escapes wildcard characters (_ and %) in metadata name parameters when
Expand Down Expand Up @@ -836,7 +872,9 @@ private async Task<QueryResult> GetCatalogsAsync(CancellationToken cancellationT
return new QueryResult(1, new HiveInfoArrowStream(catalogSchema, new IArrowArray[] { sparkBuilder.Build() }));
}

string sql = new ShowCatalogsCommand(EscapePatternWildcardsInName(_metadataCatalogName)).Build();
// GetCatalogs returns all catalogs — no filtering by pattern,
// matching Thrift behavior (Thrift RPC has no catalog filter for GetCatalogs).
string sql = new ShowCatalogsCommand(null).Build();
activity?.SetTag("sql_query", sql);
var batches = await _connection.ExecuteMetadataSqlAsync(sql, cancellationToken).ConfigureAwait(false);

Expand Down Expand Up @@ -882,7 +920,7 @@ private async Task<QueryResult> GetSchemasAsync(CancellationToken cancellationTo
activity?.SetTag("sql_query", sql);
var batches = await _connection.ExecuteMetadataSqlAsync(sql, cancellationToken).ConfigureAwait(false);

// SHOW SCHEMAS IN ALL CATALOGS returns 2 columns: catalog_name, databaseName
// SHOW SCHEMAS IN ALL CATALOGS returns 2 columns: databaseName, catalog
// SHOW SCHEMAS IN `catalog` returns 1 column: databaseName
bool showAllCatalogs = catalog == null;

Expand All @@ -896,8 +934,8 @@ private async Task<QueryResult> GetSchemasAsync(CancellationToken cancellationTo

if (showAllCatalogs)
{
catalogArray = batch.Column(0) as StringArray;
schemaArray = batch.Column(1) as StringArray;
schemaArray = batch.Column(0) as StringArray;
catalogArray = batch.Column(1) as StringArray;
}
else
{
Expand Down
Loading