From 619d06eba9835d7ff42c6896fdf967255074cc28 Mon Sep 17 00:00:00 2001 From: ryanbordo Date: Sun, 11 Jan 2026 01:58:53 -0800 Subject: [PATCH 1/6] add pyhive connection socket timeout --- python/pyhive/hive.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/pyhive/hive.py b/python/pyhive/hive.py index d6f8080f2da..bf14ade7cef 100644 --- a/python/pyhive/hive.py +++ b/python/pyhive/hive.py @@ -160,7 +160,8 @@ def __init__( check_hostname=None, ssl_cert=None, thrift_transport=None, - ssl_context=None + ssl_context=None, + socket_timeout=None ): """Connect to HiveServer2 @@ -175,6 +176,7 @@ def __init__( Incompatible with host, port, auth, kerberos_service_name, and password. :param ssl_context: A custom SSL context to use for HTTPS connections. If provided, this overrides check_hostname and ssl_cert parameters. + :param socket_timeout: Millisecond timeout for the Thrift socket connections. The way to support LDAP and GSSAPI is originated from cloudera/Impyla: https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62 /impala/_thrift_api.py#L152-L160 @@ -236,6 +238,8 @@ def __init__( if auth is None: auth = 'NONE' socket = thrift.transport.TSocket.TSocket(host, port) + if socket_timeout: + socket.setTimeout(socket_timeout) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml self._transport = thrift.transport.TTransport.TBufferedTransport(socket) From e82306e4846b0e311666a647e568f80eec5d3e18 Mon Sep 17 00:00:00 2001 From: ryanbordo Date: Sun, 11 Jan 2026 02:17:29 -0800 Subject: [PATCH 2/6] add to http method as well --- python/pyhive/hive.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/pyhive/hive.py b/python/pyhive/hive.py index bf14ade7cef..051be949291 100644 --- a/python/pyhive/hive.py +++ b/python/pyhive/hive.py @@ -161,7 +161,7 @@ def __init__( ssl_cert=None, thrift_transport=None, ssl_context=None, - socket_timeout=None + connection_timeout=None, ): """Connect to HiveServer2 @@ -176,7 +176,7 @@ def __init__( Incompatible with host, port, auth, kerberos_service_name, and password. :param ssl_context: A custom SSL context to use for HTTPS connections. If provided, this overrides check_hostname and ssl_cert parameters. - :param socket_timeout: Millisecond timeout for the Thrift socket connections. + :param connection_timeout: Millisecond timeout for Thrift connections. The way to support LDAP and GSSAPI is originated from cloudera/Impyla: https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62 /impala/_thrift_api.py#L152-L160 @@ -195,6 +195,8 @@ def __init__( ), ssl_context=ssl_context, ) + if connection_timeout: + thrift_transport.setTimeout(connection_timeout) if auth in ("BASIC", "NOSASL", "NONE", None): # Always needs the Authorization header @@ -238,8 +240,8 @@ def __init__( if auth is None: auth = 'NONE' socket = thrift.transport.TSocket.TSocket(host, port) - if socket_timeout: - socket.setTimeout(socket_timeout) + if connection_timeout: + socket.setTimeout(connection_timeout) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml self._transport = thrift.transport.TTransport.TBufferedTransport(socket) From cb26cb8f127f6addfdd94b8ca052884686262e92 Mon Sep 17 00:00:00 2001 From: ryanbordo Date: Sun, 11 Jan 2026 02:38:35 -0800 Subject: [PATCH 3/6] add test --- python/pyhive/tests/test_hive.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/pyhive/tests/test_hive.py b/python/pyhive/tests/test_hive.py index c2a020e17fa..ccea22f6ced 100644 --- a/python/pyhive/tests/test_hive.py +++ b/python/pyhive/tests/test_hive.py @@ -259,6 +259,17 @@ def test_basic_ssl_context(self): cursor.execute('SELECT 1 FROM one_row') self.assertEqual(cursor.fetchall(), [(1,)]) + def test_connection_timeout(self): + """Test that a connection timeout is set without error.""" + with contextlib.closing(hive.connect( + host=_HOST, + port=10000, + connection_timeout=10 * 1000 + )) as connection: + with contextlib.closing(connection.cursor()) as cursor: + # Use the same query pattern as other tests + cursor.execute('SELECT 1 FROM one_row') + self.assertEqual(cursor.fetchall(), [(1,)]) def _restart_hs2(): subprocess.check_call(['sudo', 'service', 'hive-server2', 'restart']) From 5862230c7e5d3038380ed85b94f8f10810fbeda2 Mon Sep 17 00:00:00 2001 From: Ryan Bordo Date: Thu, 5 Feb 2026 15:58:13 -0800 Subject: [PATCH 4/6] use None check, rather than truthy for 0 case --- python/pyhive/hive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyhive/hive.py b/python/pyhive/hive.py index 051be949291..e755fb7e553 100644 --- a/python/pyhive/hive.py +++ b/python/pyhive/hive.py @@ -195,7 +195,7 @@ def __init__( ), ssl_context=ssl_context, ) - if connection_timeout: + if connection_timeout is not None: thrift_transport.setTimeout(connection_timeout) if auth in ("BASIC", "NOSASL", "NONE", None): @@ -240,7 +240,7 @@ def __init__( if auth is None: auth = 'NONE' socket = thrift.transport.TSocket.TSocket(host, port) - if connection_timeout: + if connection_timeout is not None: socket.setTimeout(connection_timeout) if auth == 'NOSASL': # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml From c0e5ba902dc67e1939927e53417d45790b618db7 Mon Sep 17 00:00:00 2001 From: Ryan Bordo Date: Thu, 5 Feb 2026 16:01:09 -0800 Subject: [PATCH 5/6] add doc info --- python/pyhive/hive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyhive/hive.py b/python/pyhive/hive.py index e755fb7e553..dc25d1bc3eb 100644 --- a/python/pyhive/hive.py +++ b/python/pyhive/hive.py @@ -176,7 +176,7 @@ def __init__( Incompatible with host, port, auth, kerberos_service_name, and password. :param ssl_context: A custom SSL context to use for HTTPS connections. If provided, this overrides check_hostname and ssl_cert parameters. - :param connection_timeout: Millisecond timeout for Thrift connections. + :param connection_timeout: Millisecond timeout for Thrift connections. Skipped if using thrift_transport. The way to support LDAP and GSSAPI is originated from cloudera/Impyla: https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62 /impala/_thrift_api.py#L152-L160 From 6b349b70c9df00d5ce98b1f1069f26dcdde7828b Mon Sep 17 00:00:00 2001 From: ryanbordo Date: Sat, 21 Feb 2026 14:59:04 -0800 Subject: [PATCH 6/6] add more exhaustive tests --- python/.gitignore | 1 + python/pyhive/tests/test_hive.py | 32 ++++++++++++++++++++++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/python/.gitignore b/python/.gitignore index a473be421e7..1b1a821accf 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -16,4 +16,5 @@ cover/ *.iml /scripts/.thrift_gen venv/ +.venv/ .envrc diff --git a/python/pyhive/tests/test_hive.py b/python/pyhive/tests/test_hive.py index ccea22f6ced..4c6f6094eca 100644 --- a/python/pyhive/tests/test_hive.py +++ b/python/pyhive/tests/test_hive.py @@ -259,17 +259,37 @@ def test_basic_ssl_context(self): cursor.execute('SELECT 1 FROM one_row') self.assertEqual(cursor.fetchall(), [(1,)]) - def test_connection_timeout(self): + def test_connection_timeout_sasl(self): """Test that a connection timeout is set without error.""" with contextlib.closing(hive.connect( host=_HOST, port=10000, - connection_timeout=10 * 1000 + connection_timeout=2_000, )) as connection: - with contextlib.closing(connection.cursor()) as cursor: - # Use the same query pattern as other tests - cursor.execute('SELECT 1 FROM one_row') - self.assertEqual(cursor.fetchall(), [(1,)]) + # thrift converts milliseconds to seconds + assert connection._transport._trans._timeout == 2 + + def test_connection_timeout_nosasl(self): + """Test that a connection timeout is set without error.""" + with contextlib.closing(hive.connect( + host=_HOST, + port=10000, + connection_timeout=2_000, + auth='NOSASL', + )) as connection: + # thrift converts milliseconds to seconds + assert connection._transport._TBufferedTransport__trans._timeout == 2 + + def test_connection_timeout_http(self): + """Test that a connection timeout is set without error.""" + with contextlib.closing(hive.connect( + host=_HOST, + port=10000, + connection_timeout=2_000, + scheme='http', + )) as connection: + # thrift converts milliseconds to seconds + assert connection._transport._THttpClient__timeout == 2 def _restart_hs2(): subprocess.check_call(['sudo', 'service', 'hive-server2', 'restart'])