Skip to content

Commit 9ba6bdb

Browse files
feat(csharp/src/Drivers): Add Databricks driver (#2672)
After the initial Spark work, the Databricks-specific features were becoming more entwined in the code. There have been requests for additional Databricks-specific features such as CloudFetch, Entra authentication, and LZ4 compression to name a few. This PR moves the Databricks-specific code out of the Spark driver and into its own driver. It leverages the Thrift and Spark libraries that exist in the Apache driver but teases out the Databricks capabilities from the Spark capabilities. It does so by exposing the internals of the Apache driver so that the Databricks driver can leverage the same underlying libraries while still being able to offer differentiating features and raises the branding of the driver from under the Apache/Spark umbrella to the same level as BigQuery, FlightSQL, and Snowflake. --------- Co-authored-by: David Coe <> Co-authored-by: Bruce Irschick <[email protected]>
1 parent fc3ff2f commit 9ba6bdb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1833
-427
lines changed

csharp/Apache.Arrow.Adbc.sln

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow.Adbc.Tests.Dri
3838
EndProject
3939
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Benchmarks", "Benchmarks\Benchmarks.csproj", "{BAF2CF14-BA77-429E-AF54-A34B978E9F5C}"
4040
EndProject
41+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Apache.Arrow.Adbc.Drivers.Databricks", "src\Drivers\Databricks\Apache.Arrow.Adbc.Drivers.Databricks.csproj", "{25042111-6B86-8B75-7EF6-5BFAA36F72B1}"
42+
EndProject
43+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Apache.Arrow.Adbc.Tests.Drivers.Databricks", "test\Drivers\Databricks\Apache.Arrow.Adbc.Tests.Drivers.Databricks.csproj", "{BA07EB2C-5246-EB72-153C-493C7E7412D2}"
44+
EndProject
4145
Global
4246
GlobalSection(SolutionConfigurationPlatforms) = preSolution
4347
Debug|Any CPU = Debug|Any CPU
@@ -100,6 +104,14 @@ Global
100104
{BAF2CF14-BA77-429E-AF54-A34B978E9F5C}.Debug|Any CPU.Build.0 = Debug|Any CPU
101105
{BAF2CF14-BA77-429E-AF54-A34B978E9F5C}.Release|Any CPU.ActiveCfg = Release|Any CPU
102106
{BAF2CF14-BA77-429E-AF54-A34B978E9F5C}.Release|Any CPU.Build.0 = Release|Any CPU
107+
{25042111-6B86-8B75-7EF6-5BFAA36F72B1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
108+
{25042111-6B86-8B75-7EF6-5BFAA36F72B1}.Debug|Any CPU.Build.0 = Debug|Any CPU
109+
{25042111-6B86-8B75-7EF6-5BFAA36F72B1}.Release|Any CPU.ActiveCfg = Release|Any CPU
110+
{25042111-6B86-8B75-7EF6-5BFAA36F72B1}.Release|Any CPU.Build.0 = Release|Any CPU
111+
{BA07EB2C-5246-EB72-153C-493C7E7412D2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
112+
{BA07EB2C-5246-EB72-153C-493C7E7412D2}.Debug|Any CPU.Build.0 = Debug|Any CPU
113+
{BA07EB2C-5246-EB72-153C-493C7E7412D2}.Release|Any CPU.ActiveCfg = Release|Any CPU
114+
{BA07EB2C-5246-EB72-153C-493C7E7412D2}.Release|Any CPU.Build.0 = Release|Any CPU
103115
EndGlobalSection
104116
GlobalSection(SolutionProperties) = preSolution
105117
HideSolutionNode = FALSE
@@ -119,6 +131,8 @@ Global
119131
{77D5A92F-4136-4DE7-81F4-43B981223280} = {FEB257A0-4FD3-495E-9A47-9E1649755445}
120132
{5B27FB02-D4AE-4ACB-AD88-5E64EEB61729} = {C7290227-E925-47E7-8B6B-A8B171645D58}
121133
{BAF2CF14-BA77-429E-AF54-A34B978E9F5C} = {5BD04C26-CE52-4893-8C1A-479705195CEF}
134+
{25042111-6B86-8B75-7EF6-5BFAA36F72B1} = {FEB257A0-4FD3-495E-9A47-9E1649755445}
135+
{BA07EB2C-5246-EB72-153C-493C7E7412D2} = {C7290227-E925-47E7-8B6B-A8B171645D58}
122136
EndGlobalSection
123137
GlobalSection(ExtensibilityGlobals) = postSolution
124138
SolutionGuid = {4795CF16-0FDB-4BE0-9768-5CF31564DC03}

csharp/src/Drivers/Apache/Apache.Arrow.Adbc.Drivers.Apache.csproj

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
1-
<Project Sdk="Microsoft.NET.Sdk">
1+
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
44
<TargetFrameworks>netstandard2.0;net472;net6.0</TargetFrameworks>
55
</PropertyGroup>
66

77
<ItemGroup>
88
<PackageReference Include="ApacheThrift" Version="0.21.0" />
9-
<PackageReference Include="K4os.Compression.LZ4" Version="1.3.8" />
10-
<PackageReference Include="K4os.Compression.LZ4.Streams" Version="1.3.8" />
119
<PackageReference Include="System.Net.Http" Version="4.3.4" />
1210
<PackageReference Include="System.Text.Json" Version="8.0.5" />
1311
</ItemGroup>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
using System.Runtime.CompilerServices;
19+
20+
[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Drivers.Databricks, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
21+
[assembly: InternalsVisibleTo("Apache.Arrow.Adbc.Tests.Drivers.Databricks, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]

csharp/src/Drivers/Apache/Hive2/HiveServer2HttpConnection.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
using System.Net;
2222
using System.Net.Http;
2323
using System.Net.Http.Headers;
24-
using System.Net.Security;
2524
using System.Text;
2625
using System.Threading;
2726
using System.Threading.Tasks;

csharp/src/Drivers/Apache/Hive2/HiveServer2TlsImpl.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
*/
1717

1818
using System;
19-
using System.IO;
2019
using System.Collections.Generic;
20+
using System.IO;
21+
using System.Net.Http;
2122
using System.Net.Security;
2223
using System.Security.Cryptography.X509Certificates;
23-
using System.Net.Http;
2424

2525
namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
2626
{

csharp/src/Drivers/Apache/Impala/ImpalaConnection.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,9 @@
1717

1818
using System;
1919
using System.Collections.Generic;
20-
using System.Linq;
2120
using System.Threading;
2221
using System.Threading.Tasks;
2322
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
24-
using Apache.Arrow.Ipc;
2523
using Apache.Hive.Service.Rpc.Thrift;
2624

2725
namespace Apache.Arrow.Adbc.Drivers.Apache.Impala

csharp/src/Drivers/Apache/Impala/ImpalaHttpConnection.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@
1717

1818
using System;
1919
using System.Collections.Generic;
20-
using System.Diagnostics;
2120
using System.Globalization;
2221
using System.Net;
2322
using System.Net.Http;
2423
using System.Net.Http.Headers;
25-
using System.Net.Security;
2624
using System.Text;
2725
using System.Threading;
2826
using System.Threading.Tasks;

csharp/src/Drivers/Apache/Impala/ImpalaStandardConnection.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
using System;
1919
using System.Collections.Generic;
20-
using System.Diagnostics;
2120
using System.Net;
2221
using System.Threading;
2322
using System.Threading.Tasks;

csharp/src/Drivers/Apache/Spark/README.md

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ but can also be passed in the call to `AdbcDatabase.Connect`.
2626

2727
| Property | Description | Default |
2828
| :--- | :--- | :--- |
29-
| `adbc.spark.type` | (Required) Indicates the Spark server type. One of `databricks`, `http` (future: `standard`) | |
29+
| `adbc.spark.type` | (Required) Indicates the Spark server type. Currently only `http` (future: `standard`) | |
3030
| `adbc.spark.auth_type` | An indicator of the intended type of authentication. Allowed values: `none`, `username_only`, `basic`, and `token`. This property is optional. The authentication type can be inferred from `token`, `username`, and `password`. If a `token` value is provided, token authentication is used. Otherwise, if both `username` and `password` values are provided, basic authentication is used. | |
3131
| `adbc.spark.host` | Host name for the data source. Do not include scheme or port number. Example: `sparkserver.region.cloudapp.azure.com` | |
3232
| `adbc.spark.port` | The port number the data source listens on for a new connections. | `443` |
@@ -67,33 +67,6 @@ The `adbc.apache.statement.polltime_ms` specifies the time between polls to the
6767

6868
The following table depicts how the Spark ADBC driver converts a Spark type to an Arrow type and a .NET type:
6969

70-
### Spark on Databricks
71-
72-
| Spark Type | Arrow Type | C# Type |
73-
| :--- | :---: | :---: |
74-
| ARRAY* | String | string |
75-
| BIGINT | Int64 | long |
76-
| BINARY | Binary | byte[] |
77-
| BOOLEAN | Boolean | bool |
78-
| CHAR | String | string |
79-
| DATE | Date32 | DateTime |
80-
| DECIMAL | Decimal128 | SqlDecimal |
81-
| DOUBLE | Double | double |
82-
| FLOAT | Float | float |
83-
| INT | Int32 | int |
84-
| INTERVAL_DAY_TIME+ | String | string |
85-
| INTERVAL_YEAR_MONTH+ | String | string |
86-
| MAP* | String | string |
87-
| NULL | Null | null |
88-
| SMALLINT | Int16 | short |
89-
| STRING | String | string |
90-
| STRUCT* | String | string |
91-
| TIMESTAMP | Timestamp | DateTimeOffset |
92-
| TINYINT | Int8 | sbyte |
93-
| UNION | String | string |
94-
| USER_DEFINED | String | string |
95-
| VARCHAR | String | string |
96-
9770
### Apache Spark over HTTP (adbc.spark.data_type_conv = ?)
9871

9972
| Spark Type | Arrow Type (`none`) | C# Type (`none`) | Arrow Type (`scalar`) | C# Type (`scalar`) |
@@ -126,14 +99,6 @@ The following table depicts how the Spark ADBC driver converts a Spark type to a
12699

127100
## Supported Variants
128101

129-
### Spark on Databricks
130-
131-
Support for Spark on Databricks is the most mature.
132-
133-
The Spark ADBC driver supports token-based authentiation using the
134-
[Databricks personal access token](https://docs.databricks.com/en/dev-tools/auth/pat.html).
135-
Basic (username and password) authenication is not supported, at this time.
136-
137102
### Apache Spark over HTPP
138103

139104
Support for Spark over HTTP is initial.

csharp/src/Drivers/Apache/Spark/SparkConnection.cs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,9 @@
1717

1818
using System;
1919
using System.Collections.Generic;
20-
using System.Linq;
2120
using System.Threading;
22-
using System.Threading.Tasks;
2321
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
24-
using Apache.Arrow.Adbc.Extensions;
25-
using Apache.Arrow.Ipc;
26-
using Apache.Arrow.Types;
2722
using Apache.Hive.Service.Rpc.Thrift;
28-
using Thrift.Transport;
2923

3024
namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
3125
{

0 commit comments

Comments
 (0)