-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathDatabricksParameters.cs
More file actions
481 lines (417 loc) · 22.8 KB
/
DatabricksParameters.cs
File metadata and controls
481 lines (417 loc) · 22.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
/*
* Copyright (c) 2025 ADBC Drivers Contributors
*
* This file has been modified from its original version, which is
* under the Apache License:
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using AdbcDrivers.HiveServer2.Spark;
namespace AdbcDrivers.Databricks
{
/// <summary>
/// Parameters used for connecting to Databricks data sources.
/// </summary>
public class DatabricksParameters : SparkParameters
{
// CloudFetch configuration parameters
/// <summary>
/// Whether to use CloudFetch for retrieving results.
/// Default value is true if not specified.
/// </summary>
public const string UseCloudFetch = "adbc.databricks.cloudfetch.enabled";
/// <summary>
/// Whether the client can decompress LZ4 compressed results.
/// Default value is true if not specified.
/// </summary>
public const string CanDecompressLz4 = "adbc.databricks.cloudfetch.lz4.enabled";
/// <summary>
/// Maximum bytes per file for CloudFetch.
/// The value can be specified with unit suffixes: B (bytes), KB (kilobytes), MB (megabytes), GB (gigabytes).
/// If no unit is specified, the value is treated as bytes.
/// Default value is 20MB if not specified.
/// </summary>
public const string MaxBytesPerFile = "adbc.databricks.cloudfetch.max_bytes_per_file";
/// <summary>
/// Maximum number of retry attempts for CloudFetch downloads.
/// Default value is -1 (not set, use timeout only).
/// When set to a non-negative value, the retry loop exits if either this count or the timeout is reached.
/// </summary>
public const string CloudFetchMaxRetries = "adbc.databricks.cloudfetch.max_retries";
/// <summary>
/// Maximum time in seconds to retry failed CloudFetch downloads.
/// Uses exponential backoff with jitter within this time budget.
/// Default value is 300 (5 minutes) if not specified.
/// </summary>
public const string CloudFetchRetryTimeoutSeconds = "adbc.databricks.cloudfetch.retry_timeout_seconds";
/// <summary>
/// Delay in milliseconds between CloudFetch retry attempts.
/// Default value is 500ms if not specified.
/// </summary>
public const string CloudFetchRetryDelayMs = "adbc.databricks.cloudfetch.retry_delay_ms";
/// <summary>
/// Timeout in minutes for CloudFetch HTTP operations.
/// Default value is 5 minutes if not specified.
/// </summary>
public const string CloudFetchTimeoutMinutes = "adbc.databricks.cloudfetch.timeout_minutes";
/// <summary>
/// Buffer time in seconds before URL expiration to trigger refresh.
/// Default value is 60 seconds if not specified.
/// </summary>
public const string CloudFetchUrlExpirationBufferSeconds = "adbc.databricks.cloudfetch.url_expiration_buffer_seconds";
/// <summary>
/// Maximum number of URL refresh attempts for CloudFetch downloads.
/// Default value is 3 if not specified.
/// </summary>
public const string CloudFetchMaxUrlRefreshAttempts = "adbc.databricks.cloudfetch.max_url_refresh_attempts";
/// <summary>
/// Whether to enable the use of direct results when executing queries.
/// Default value is true if not specified.
/// </summary>
public const string EnableDirectResults = "adbc.databricks.enable_direct_results";
/// <summary>
/// Whether to apply service side properties (SSP) with queries. If false, SSP will be applied
/// by setting the Thrift configuration when the session is opened.
/// Default value is false if not specified.
/// </summary>
public const string ApplySSPWithQueries = "adbc.databricks.apply_ssp_with_queries";
/// <summary>
/// Prefix for server-side properties. Properties with this prefix will be passed to the server
/// by executing a "set key=value" query when opening a session.
/// For example, a property with key "adbc.databricks.ssp_use_cached_result"
/// and value "true" will result in executing "set use_cached_result=true" on the server.
/// </summary>
public const string ServerSidePropertyPrefix = "adbc.databricks.ssp_";
/// <summary>
/// Query tags to be sent with the statement execution.
/// Tags are sent to the server in the confOverlay and apply to the statement only.
/// Format: comma-separated key:value pairs, e.g., "team:analytics,app:myapp"
/// </summary>
public const string QueryTags = "adbc.databricks.query_tags";
/// <summary>
/// Controls whether to retry requests that receive retryable responses (408, 502, 503, 504).
/// Default value is true (enabled). Set to false to disable retry behavior.
/// </summary>
public const string TemporarilyUnavailableRetry = "adbc.spark.temporarily_unavailable_retry";
/// <summary>
/// Maximum total time in seconds to retry retryable responses (408, 502, 503, 504) before failing.
/// Default value is 900 seconds (15 minutes). Set to 0 to retry indefinitely.
/// </summary>
public const string TemporarilyUnavailableRetryTimeout = "adbc.spark.temporarily_unavailable_retry_timeout";
/// <summary>
/// Controls whether to retry requests that receive HTTP 429 (TooManyRequests) response.
/// Default value is true. Set to false to disable rate limit retry behavior.
/// </summary>
public const string RateLimitRetry = "adbc.databricks.rate_limit_retry";
/// <summary>
/// The number of seconds that the connector waits before stopping an attempt to retry an operation
/// when the operation receives an HTTP 429 response.
/// Default value is 120 seconds. Set to 0 to retry indefinitely.
/// </summary>
public const string RateLimitRetryTimeout = "adbc.databricks.rate_limit_retry_timeout";
/// <summary>
/// Controls whether to retry requests that fail due to transport-level errors
/// (connection refused, TCP reset, DNS failure, dead connections).
/// Default value is true (enabled). Set to false to disable transport error retry behavior.
/// </summary>
public const string TransportErrorRetry = "adbc.databricks.transport_error_retry";
/// <summary>
/// Per-request timeout in seconds to detect dead connections (e.g., half-open TCP).
/// When a connection hangs without sending data, this timeout fires to cancel the request
/// and allow retry. Default value is 900 seconds (15 minutes), matching JDBC socketTimeout.
/// Set to 0 to disable the per-request timeout.
/// </summary>
public const string HttpRequestTimeout = "adbc.databricks.http_request_timeout";
/// <summary>
/// Maximum number of parallel downloads for CloudFetch operations.
/// Default value is 3 if not specified.
/// </summary>
public const string CloudFetchParallelDownloads = "adbc.databricks.cloudfetch.parallel_downloads";
/// <summary>
/// Number of files to prefetch in CloudFetch operations.
/// Default value is 2 if not specified.
/// </summary>
public const string CloudFetchPrefetchCount = "adbc.databricks.cloudfetch.prefetch_count";
/// <summary>
/// Maximum memory buffer size in MB for CloudFetch prefetched files.
/// Default value is 200MB if not specified.
/// </summary>
public const string CloudFetchMemoryBufferSize = "adbc.databricks.cloudfetch.memory_buffer_size_mb";
/// <summary>
/// Whether CloudFetch prefetch functionality is enabled.
/// Default value is true if not specified.
/// </summary>
public const string CloudFetchPrefetchEnabled = "adbc.databricks.cloudfetch.prefetch_enabled";
/// <summary>
/// Maximum bytes per fetch request when retrieving query results from servers.
/// The value can be specified with unit suffixes: B (bytes), KB (kilobytes), MB (megabytes), GB (gigabytes).
/// If no unit is specified, the value is treated as bytes.
/// Default value is 400MB if not specified.
/// </summary>
public const string MaxBytesPerFetchRequest = "adbc.databricks.max_bytes_per_fetch_request";
/// <summary>
/// The OAuth grant type to use for authentication.
/// Supported values:
/// - "access_token": Use a pre-generated Databricks personal access token (default)
/// - "client_credentials": Use OAuth client credentials flow for m2m authentication
/// When using "client_credentials", the driver will automatically handle token acquisition,
/// renewal, and authentication with the Databricks service.
/// </summary>
public const string OAuthGrantType = "adbc.databricks.oauth.grant_type";
/// <summary>
/// The OAuth client ID for client credentials flow.
/// Required when grant_type is "client_credentials".
/// This is the client ID you obtained when registering your application with Databricks.
/// </summary>
public const string OAuthClientId = "adbc.databricks.oauth.client_id";
/// <summary>
/// The OAuth client secret for client credentials flow.
/// Required when grant_type is "client_credentials".
/// This is the client secret you obtained when registering your application with Databricks.
/// </summary>
public const string OAuthClientSecret = "adbc.databricks.oauth.client_secret";
/// <summary>
/// The OAuth scope for client credentials flow.
/// Optional when grant_type is "client_credentials".
/// Default value is "sql" if not specified.
/// </summary>
public const string OAuthScope = "adbc.databricks.oauth.scope";
/// <summary>
/// Whether to use multiple catalogs.
/// Default value is true if not specified.
/// </summary>
public const string EnableMultipleCatalogSupport = "adbc.databricks.enable_multiple_catalog_support";
/// <summary>
/// Whether to enable primary key foreign key metadata call.
/// Default value is true if not specified.
/// </summary>
public const string EnablePKFK = "adbc.databricks.enable_pk_fk";
/// <summary>
/// Whether to use query DESC TABLE EXTENDED to get extended column metadata when the current DBR supports it
/// Default value is true if not specified.
/// </summary>
public const string UseDescTableExtended = "adbc.databricks.use_desc_table_extended";
/// <summary>
/// Whether to enable RunAsync flag in Thrift operation
/// Default value is true if not specified.
/// </summary>
public const string EnableRunAsyncInThriftOp = "adbc.databricks.enable_run_async_thrift";
/// <summary>
/// Whether to propagate trace parent headers in HTTP requests.
/// Default value is true if not specified.
/// When enabled, the driver will add W3C Trace Context headers to all HTTP requests.
/// </summary>
public const string TracePropagationEnabled = "adbc.databricks.trace_propagation.enabled";
/// <summary>
/// The name of the HTTP header to use for trace parent propagation.
/// Default value is "traceparent" (W3C standard) if not specified.
/// This allows customization for systems that use different header names.
/// </summary>
public const string TraceParentHeaderName = "adbc.databricks.trace_propagation.header_name";
/// <summary>
/// Whether to include trace state header in HTTP requests.
/// Default value is false if not specified.
/// When enabled, the driver will also propagate the tracestate header if available.
/// </summary>
public const string TraceStateEnabled = "adbc.databricks.trace_propagation.state_enabled";
/// <summary>
/// The minutes before token expiration when we should start renewing the token.
/// Default value is 0 (disabled) if not specified.
/// </summary>
public const string TokenRenewLimit = "adbc.databricks.token_renew_limit";
/// <summary>
/// The client ID of the service principal when using workload identity federation.
/// Default value is empty if not specified.
/// </summary>
public const string IdentityFederationClientId = "adbc.databricks.identity_federation_client_id";
/// <summary>
/// Controls whether driver configuration takes precedence over passed-in properties during configuration merging.
/// When "true": Environment/driver config properties override passed-in constructor properties.
/// When "false" (default): Passed-in constructor properties override environment/driver config properties.
/// This property can be set either in the environment configuration file or in passed-in properties.
/// When set in both places, the value in passed-in properties takes precedence.
/// Default value is false if not specified.
/// </summary>
public const string DriverConfigTakePrecedence = "adbc.databricks.driver_config_take_precedence";
/// <summary>
/// The interval in seconds for heartbeat polling during long-running operations.
/// This prevents queries from timing out by periodically checking operation status.
/// Default value is 60 seconds if not specified.
/// Must be a positive integer value.
/// </summary>
public const string FetchHeartbeatInterval = "adbc.databricks.fetch_heartbeat_interval";
/// <summary>
/// The timeout in seconds for operation status polling requests.
/// This controls how long to wait for each individual polling request to complete.
/// Default value is 30 seconds if not specified.
/// Must be a positive integer value.
/// </summary>
public const string OperationStatusRequestTimeout = "adbc.databricks.operation_status_request_timeout";
// Statement Execution API configuration parameters
/// <summary>
/// The warehouse ID to use for query execution.
/// Required when using Statement Execution REST API (Protocol = "rest").
/// Optional for Thrift protocol.
/// </summary>
public const string WarehouseId = "adbc.databricks.warehouse_id";
/// <summary>
/// The protocol to use for statement execution.
/// Supported values:
/// - "thrift": Use Thrift/HiveServer2 protocol (default)
/// - "rest": Use Statement Execution REST API
/// Default value is "thrift" if not specified.
/// </summary>
public const string Protocol = "adbc.databricks.protocol";
/// <summary>
/// Result disposition for Statement Execution API.
/// Supported values:
/// - "inline": Results returned directly in response (≤25 MiB)
/// - "external_links": Results via presigned URLs (≤100 GiB)
/// - "inline_or_external_links": Hybrid mode - server decides based on size (default, recommended)
/// Default value is "inline_or_external_links" if not specified.
/// Only applicable when Protocol is "rest".
/// </summary>
public const string ResultDisposition = "adbc.databricks.rest.result_disposition";
/// <summary>
/// Result format for Statement Execution API.
/// Supported values:
/// - "arrow_stream": Apache Arrow IPC format (default, recommended)
/// - "json_array": JSON array format
/// - "csv": CSV format
/// Default value is "arrow_stream" if not specified.
/// Only applicable when Protocol is "rest".
/// </summary>
public const string ResultFormat = "adbc.databricks.rest.result_format";
/// <summary>
/// Result compression codec for Statement Execution API.
/// Supported values:
/// - "lz4": LZ4 compression (default for external_links)
/// - "gzip": GZIP compression
/// - "none": No compression (default for inline)
/// Only applicable when Protocol is "rest".
/// </summary>
public const string ResultCompression = "adbc.databricks.rest.result_compression";
/// <summary>
/// Wait timeout for statement execution in seconds.
/// - 0: Async mode, return immediately
/// - 5-50: Sync mode up to timeout
/// Default: 10 seconds
/// Note: When enable_direct_results=true, this parameter is not set (server waits until complete)
/// Only applicable when Protocol is "rest".
/// </summary>
public const string WaitTimeout = "adbc.databricks.rest.wait_timeout";
/// <summary>
/// Statement polling interval in milliseconds for async execution.
/// Default: 1000ms (1 second)
/// Only applicable when Protocol is "rest".
/// </summary>
public const string PollingInterval = "adbc.databricks.rest.polling_interval_ms";
/// <summary>
/// Enable session management for Statement Execution API.
/// When true, creates and reuses session across statements in a connection.
/// When false, each statement executes without session context.
/// Default: true
/// Only applicable when Protocol is "rest".
/// </summary>
public const string EnableSessionManagement = "adbc.databricks.rest.enable_session_management";
/// <summary>
/// Whether to enable the feature flag cache for fetching remote configuration from the server.
/// When enabled, the driver fetches feature flags from the Databricks server and merges them with local properties.
/// Default value is false if not specified.
/// </summary>
public const string FeatureFlagCacheEnabled = "adbc.databricks.feature_flag_cache_enabled";
/// <summary>
/// Timeout in seconds for feature flag API requests.
/// Default value is 10 seconds if not specified.
/// </summary>
public const string FeatureFlagTimeoutSeconds = "adbc.databricks.feature_flag_timeout_seconds";
/// <summary>
/// Sliding expiration TTL in seconds for feature flag cache entries.
/// When a cache entry is read, its expiration is extended by this duration.
/// Default value is 900 seconds (15 minutes) if not specified.
/// </summary>
public const string FeatureFlagCacheTtlSeconds = "adbc.databricks.feature_flag_cache_ttl_seconds";
/// <summary>
/// Whether to return complex types (ARRAY, MAP, STRUCT) as native Arrow types.
/// When false (default): complex types are serialized to JSON strings, matching legacy behavior.
/// When true: complex types are returned as native Arrow types (ListType, MapType, StructType).
/// This applies to both Thrift and SEA protocols, providing consistent behavior across protocols.
/// Default value is false if not specified.
/// </summary>
public const string EnableComplexDatatypeSupport = "adbc.databricks.enable_complex_datatype_support";
}
/// <summary>
/// Constants used for default parameter values.
/// </summary>
public class DatabricksConstants
{
/// <summary>
/// HTTP header for passing the Databricks organization ID on REST requests.
/// </summary>
public const string OrgIdHeader = "x-databricks-org-id";
/// <summary>
/// Default heartbeat interval in seconds for long-running operations.
/// </summary>
public const int DefaultOperationStatusPollingIntervalSeconds = 60;
/// <summary>
/// Default timeout in seconds for operation status polling requests.
/// </summary>
public const int DefaultOperationStatusRequestTimeoutSeconds = 30;
/// <summary>
/// Default async execution poll interval in milliseconds.
/// </summary>
public const int DefaultAsyncExecPollIntervalMs = 100;
/// <summary>
/// Default timeout in seconds for retrying temporarily unavailable responses (408, 502, 503, 504).
/// </summary>
public const int DefaultTemporarilyUnavailableRetryTimeout = 900; // 15 minutes
/// <summary>
/// Default timeout in seconds for retrying rate-limited responses (429).
/// </summary>
public const int DefaultRateLimitRetryTimeout = 120; // 2 minutes
/// <summary>
/// Default per-request timeout in seconds to detect dead connections.
/// Matches JDBC driver's socketTimeout=900.
/// </summary>
public const int DefaultHttpRequestTimeout = 900; // 15 minutes
/// <summary>
/// Default timeout in minutes for CloudFetch HTTP operations.
/// </summary>
public const int DefaultCloudFetchTimeoutMinutes = 5;
/// <summary>
/// OAuth grant type constants
/// </summary>
public static class OAuthGrantTypes
{
/// <summary>
/// Use a pre-generated Databricks personal access token for authentication.
/// When using this grant type, you must provide the token via the
/// adbc.spark.oauth.access_token parameter.
/// </summary>
public const string AccessToken = "access_token";
/// <summary>
/// Use OAuth client credentials flow for m2m authentication.
/// When using this grant type, you must provide:
/// - adbc.databricks.oauth.client_id: The OAuth client ID
/// - adbc.databricks.oauth.client_secret: The OAuth client secret
/// The driver will automatically handle token acquisition, renewal, and
/// authentication with the Databricks service.
/// </summary>
public const string ClientCredentials = "client_credentials";
}
}
}