Nothing to major, but should be considered for drop-in replacement of spark and feature parity.
I have a bunch of unit tests regarding structtype parsing and noticed after switching to pysail that these fail.
def test_parentheses_in_field_names(self, spark: SparkSession):
with pytest.raises(ParseException):
StructType.fromDDL("struct<Tiefe(mm):double,Zeit(s):double>")
def test_slashes_in_field_names(self, spark: SparkSession):
with pytest.raises(ParseException):
StructType.fromDDL("struct<MGrad(Nm/Grad):double>")
def test_hyphenated_field_names(self, spark: SparkSession):
with pytest.raises(ParseException):
StructType.fromDDL("struct<de-DE:string,en-US:string>")
def test_nested_struct_with_special_chars(self, spark: SparkSession):
with pytest.raises(ParseException):
StructType.fromDDL("struct<Existing:boolean,Parameters:struct<Tiefe(mm):double,Result:string>>")
instead of the expected ParseException I get the following:
spark = <pyspark.sql.connect.session.SparkSession object at 0x7f58138a7860>
def test_hyphenated_field_names(self, spark: SparkSession):
with pytest.raises(ParseException):
> StructType.fromDDL("struct<de-DE:string,en-US:string>")
sdl/tests/unit/utils/test_schema.py:192:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/usr/local/lib/python3.12/site-packages/pyspark/sql/types.py:193: in fromDDL
return _parse_datatype_string(ddl)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/local/lib/python3.12/site-packages/pyspark/sql/types.py:1941: in _parse_datatype_string
return SparkSession.active()._parse_ddl(s)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/local/lib/python3.12/site-packages/pyspark/sql/connect/session.py:1141: in _parse_ddl
dt = self._client._analyze(method="ddl_parse", ddl_string=ddl).parsed
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/usr/local/lib/python3.12/site-packages/pyspark/sql/connect/client/core.py:1343: in _analyze
self._handle_error(error)
/usr/local/lib/python3.12/site-packages/pyspark/sql/connect/client/core.py:1811: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f58138a7080>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.INTERNAL
details = "expected value at line 1 col...error_string = "UNKNOWN:Error received from peer {grpc_message:"expected value at line 1 column 1", grpc_status:13}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
logger.debug(f"Received ErrorInfo: {info}")
if info.metadata["errorClass"] == "INVALID_HANDLE.SESSION_CHANGED":
self._closed = True
> raise convert_exception(
info,
status.message,
self._fetch_enriched_error(info),
self._display_server_stack_trace(),
) from None
E pyspark.errors.exceptions.connect.IllegalArgumentException: expected value at line 1 column 1
/usr/local/lib/python3.12/site-packages/pyspark/sql/connect/client/core.py:1882: IllegalArgumentException
Nothing to major, but should be considered for drop-in replacement of spark and feature parity.
I have a bunch of unit tests regarding structtype parsing and noticed after switching to pysail that these fail.
instead of the expected
ParseExceptionI get the following: