Skip to content

Commit 67de1c4

Browse files
always set query_id
1 parent f40cd28 commit 67de1c4

File tree

6 files changed

+81
-4
lines changed

6 files changed

+81
-4
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ The supported method of passing ClickHouse server settings is to prefix such arg
2424
## UNRELEASED
2525
### Bug Fixes
2626
### Improvements
27+
- Always generate query_id from the client side as a UUID4 if it not explicitly set. Closes [#596](https://github.com/ClickHouse/clickhouse-connect/issues/596)
2728

2829
## 0.10.0, 2025-11-14
2930

clickhouse_connect/common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def _init_common(name: str, options: Sequence[Any], default: Any) -> None:
6969

7070

7171
_init_common('autogenerate_session_id', (True, False), True)
72+
_init_common('autogenerate_query_id', (True, False), True)
7273
_init_common('dict_parameter_format', ('json', 'map'), 'json')
7374
_init_common('invalid_setting_action', ('send', 'drop', 'error'), 'error')
7475
_init_common('max_connection_age', (), 10 * 60) # Max time in seconds to keep reusing a database TCP connection

clickhouse_connect/driver/asyncclient.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,9 @@ def create_query_context(self,
518518
Creates or updates a reusable QueryContext object
519519
:param query: Query statement/format string
520520
:param parameters: Optional dictionary used to format the query
521-
:param settings: Optional dictionary of ClickHouse settings (key/string values)
521+
:param settings: Optional dictionary of ClickHouse settings (key/string values). This includes both
522+
server settings (e.g., max_threads, max_memory_usage) and HTTP interface parameters (e.g., query_id,
523+
session_id, database). All settings are sent as URL query parameters.
522524
:param query_formats: See QueryContext __init__ docstring
523525
:param column_formats: See QueryContext __init__ docstring
524526
:param encoding: See QueryContext __init__ docstring
@@ -543,7 +545,9 @@ def create_query_context(self,
543545
:param use_extended_dtypes: Only relevant to Pandas Dataframe queries. Use Pandas "missing types", such as
544546
pandas.NA and pandas.NaT for ClickHouse NULL values, as well as extended Pandas dtypes such as IntegerArray
545547
and StringArray. Defaulted to True for query_df methods
546-
:param transport_settings: Optional dictionary of transport level settings (HTTP headers, etc.)
548+
:param transport_settings: Optional dictionary of transport level settings sent as HTTP headers. Use this for
549+
custom headers (e.g., X-Workload) for load balancers or proxies. ClickHouse parameters like query_id should
550+
go in the settings dict, not here.
547551
:return: Reusable QueryContext
548552
"""
549553

clickhouse_connect/driver/client.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,9 @@ def create_query_context(self,
473473
Creates or updates a reusable QueryContext object
474474
:param query: Query statement/format string
475475
:param parameters: Optional dictionary used to format the query
476-
:param settings: Optional dictionary of ClickHouse settings (key/string values)
476+
:param settings: Optional dictionary of ClickHouse settings (key/string values). This includes both
477+
server settings (e.g., max_threads, max_memory_usage) and HTTP interface parameters (e.g., query_id,
478+
session_id, database). All settings are sent as URL query parameters.
477479
:param query_formats: See QueryContext __init__ docstring
478480
:param column_formats: See QueryContext __init__ docstring
479481
:param encoding: See QueryContext __init__ docstring
@@ -500,7 +502,9 @@ def create_query_context(self,
500502
:param use_extended_dtypes: Only relevant to Pandas Dataframe queries. Use Pandas "missing types", such as
501503
pandas.NA and pandas.NaT for ClickHouse NULL values, as well as extended Pandas dtypes such as IntegerArray
502504
and StringArray. Defaulted to True for query_df methods
503-
:param transport_settings: Optional dictionary of transport level settings (HTTP headers, etc.)
505+
:param transport_settings: Optional dictionary of transport level settings sent as HTTP headers. Use this for
506+
custom headers (e.g., X-Workload) for load balancers or proxies. ClickHouse parameters like query_id should
507+
go in the settings dict, not here.
504508
:return: Reusable QueryContext
505509
"""
506510
resolved_utc_tz_aware = self.utc_tz_aware if utc_tz_aware is None else utc_tz_aware

clickhouse_connect/driver/httpclient.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def __init__(self,
7979
utc_tz_aware: Optional[bool] = None,
8080
show_clickhouse_errors: Optional[bool] = None,
8181
autogenerate_session_id: Optional[bool] = None,
82+
autogenerate_query_id: Optional[bool] = None,
8283
tls_mode: Optional[str] = None,
8384
proxy_path: str = '',
8485
form_encode_query_params: bool = False,
@@ -161,6 +162,11 @@ def __init__(self,
161162
elif 'session_id' not in ch_settings and _autogenerate_session_id:
162163
ch_settings['session_id'] = str(uuid.uuid4())
163164

165+
# allow to override the global autogenerate_query_id setting via the constructor params
166+
self._autogenerate_query_id = common.get_setting('autogenerate_query_id') \
167+
if autogenerate_query_id is None \
168+
else autogenerate_query_id
169+
164170
if coerce_bool(compress):
165171
compression = ','.join(available_compression)
166172
self.write_compression = available_compression[0]
@@ -191,6 +197,18 @@ def __init__(self,
191197
self._setting_status('http_headers_progress_interval_ms').is_writable:
192198
self._progress_interval = str(min(120000, max(10000, (send_receive_timeout - 5) * 1000)))
193199

200+
def _ensure_query_id(self, settings: Optional[dict]) -> Optional[dict]:
201+
if not self._autogenerate_query_id:
202+
return settings
203+
204+
if settings is None:
205+
settings = {}
206+
207+
if "query_id" not in settings:
208+
settings["query_id"] = str(uuid.uuid4())
209+
210+
return settings
211+
194212
def set_client_setting(self, key, value):
195213
str_value = self._validate_setting(key, value, common.get_setting('invalid_setting_action'))
196214
if str_value is not None:
@@ -215,6 +233,8 @@ def _prep_query(self, context: QueryContext):
215233
return final_query + fmt
216234

217235
def _query_with_context(self, context: QueryContext) -> QueryResult:
236+
context.settings = self._ensure_query_id(context.settings)
237+
218238
headers = {}
219239
params = {}
220240
if self.database:
@@ -307,6 +327,8 @@ def data_insert(self, context: InsertContext) -> QuerySummary:
307327
logger.debug('No data included in insert, skipping')
308328
return QuerySummary()
309329

330+
context.settings = self._ensure_query_id(context.settings)
331+
310332
def error_handler(resp: HTTPResponse):
311333
# If we actually had a local exception when building the insert, throw that instead
312334
if context.insert_exception:
@@ -342,6 +364,8 @@ def raw_insert(self, table: str = None,
342364
"""
343365
See BaseClient doc_string for this method
344366
"""
367+
settings = self._ensure_query_id(settings)
368+
345369
params = {}
346370
headers = {'Content-Type': 'application/octet-stream'}
347371
if compression:
@@ -385,6 +409,8 @@ def command(self,
385409
"""
386410
See BaseClient doc_string for this method
387411
"""
412+
settings = self._ensure_query_id(settings)
413+
388414
cmd, params = bind_query(cmd, parameters, self.server_tz)
389415
headers = {}
390416
payload = None

tests/integration_tests/test_client.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from pathlib import Path
22
from time import sleep
33
from typing import Callable
4+
import uuid
45

56
import pytest
67

@@ -382,3 +383,43 @@ def test_role_setting_works(test_client: Client, test_config: TestConfig):
382383
)
383384
res = role_client.query('SELECT currentRoles()')
384385
assert res.result_rows == [([role_limited],)]
386+
387+
388+
# pylint: disable=protected-access
389+
def test_autogenerate_query_id(test_client: Client, test_table_engine: str, test_config: TestConfig):
390+
def _is_valid_uuid_v4_string(id_string: str) -> bool:
391+
parsed_uuid = uuid.UUID(id_string)
392+
return parsed_uuid.version == 4
393+
394+
result = test_client.query("SELECT 1")
395+
assert _is_valid_uuid_v4_string(result.query_id)
396+
397+
manual_query_id = "test_manual_query_id_12345"
398+
result = test_client.query("SELECT 2", settings={"query_id": manual_query_id})
399+
assert result.query_id == manual_query_id
400+
401+
summary = test_client.command("DROP TABLE IF EXISTS does_not_exist")
402+
assert _is_valid_uuid_v4_string(summary.query_id())
403+
404+
test_client.command("DROP TABLE IF EXISTS test_autogen_query_id")
405+
test_client.command(f"CREATE TABLE test_autogen_query_id (id UInt32) ENGINE {test_table_engine} ORDER BY id")
406+
summary = test_client.insert("test_autogen_query_id", [[1], [2], [3]], column_names=["id"])
407+
assert _is_valid_uuid_v4_string(summary.query_id())
408+
test_client.command("DROP TABLE test_autogen_query_id")
409+
410+
# Create client with autogenerate_query_id disabled
411+
client_no_autogen = create_client(
412+
host=test_config.host,
413+
port=test_config.port,
414+
username=test_config.username,
415+
password=test_config.password,
416+
autogenerate_query_id=False,
417+
)
418+
result = client_no_autogen.query("SELECT 4")
419+
# Even with autogen disabled the server generates a query_id
420+
# so we still expect a query_id in the result, it's just generated by the server...
421+
# not sure how to verify that specifically though.
422+
assert _is_valid_uuid_v4_string(result.query_id)
423+
424+
# We can verify that the setting on the client is False though
425+
assert client_no_autogen._autogenerate_query_id is False

0 commit comments

Comments
 (0)