-
Notifications
You must be signed in to change notification settings - Fork 16
Open
Description
when trying to do something like below in databricks (runtime 16.4 ,which uses spark 3.5.2)
from typedspark import Catalogs,create_schema
db = Catalogs(spark)I'm running into a strange recursion error .I'm really not sure where to start or if this an already known issue. Any pointers would really help.
NameError: name 'AnalysisException' is not defined
RecursionError: maximum recursion depth exceeded
During handling of the above exception, another exception occurred:
NameError Traceback (most recent call last)
File /databricks/spark/python/pyspark/sql/dataframe.py:650, in DataFrame.schema(self)
649 try:
--> 650 return cast(StructType, _parse_datatype_json_string(self._jdf.schema().json()))
651 except AnalysisException as e:
File /databricks/spark/python/pyspark/instrumentation_utils.py:42, in _wrap_function.<locals>.wrapper(*args, **kwargs)
40 if hasattr(_local, "logging") and _local.logging:
41 # no need to log since this should be internal call.
---> 42 return func(*args, **kwargs)
43 _local.logging = True
File /databricks/spark/python/pyspark/sql/dataframe.py:3785, in DataFrame.__getattr__(self, name)
3753 """Returns the :class:`Column` denoted by ``name``.
3754
3755 .. versionadded:: 1.3.0
(...)
3783 +---+
3784 """
-> 3785 if name not in self.columns:
3786 raise PySparkAttributeError(
3787 errorClass="ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": name}
3788 )
File /databricks/spark/python/pyspark/instrumentation_utils.py:75, in _wrap_property.<locals>.wrapper(self)
73 if hasattr(_local, "logging") and _local.logging:
74 # no need to log since this should be internal call.
---> 75 return prop.fget(self)
76 _local.logging = True
File /databricks/spark/python/pyspark/sql/dataframe.py:2605, in DataFrame.columns(self)
2532 """
2533 Retrieves the names of all columns in the :class:`DataFrame` as a list.
2534
(...)
2603 False
2604 """
-> 2605 return [f.name for f in self.schema.fields]
File /databricks/spark/python/pyspark/instrumentation_utils.py:120, in _wrap_cached_property.<locals>.wrapper.__get__(self, instance, owner)
118 if hasattr(_local, "logging") and _local.logging:
119 # no need to log since this should be internal call.
--> 120 return self._cached_property.__get__(instance, owner)
122 _local.logging = True
File /usr/lib/python3.12/functools.py:995, in cached_property.__get__(self, instance, owner)
994 if val is _NOT_FOUND:
--> 995 val = self.func(instance)
996 try:
File /databricks/spark/python/pyspark/sql/dataframe.py:651, in DataFrame.schema(self)
650 return cast(StructType, _parse_datatype_json_string(self._jdf.schema().json()))
--> 651 except AnalysisException as e:
652 raise e
NameError: name 'AnalysisException' is not defined
During handling of the above exception, another exception occurred:
NameError Traceback (most recent call last)
File <command-8218834077646643>, line 1
----> 1 tbl,tbl_schema = db.tes_schema.tbl.load()
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/typedspark/_utils/databases.py:130, in Table.load(self)
128 def load(self) -> Tuple[DataSet[T], T]:
129 """Loads the table as a DataSet[T] and returns the schema."""
--> 130 return load_table( # type: ignore
131 self._spark,
132 self.str,
133 to_camel_case(self._table_name),
134 )
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/typedspark/_utils/load_table.py:87, in load_table(spark, table_name, schema_name)
76 """This function loads a ``DataSet``, along with its inferred ``Schema``, in a
77 notebook.
78
(...)
84 df, Person = load_table(spark, "path.to.table")
85 """
86 dataframe = spark.table(table_name)
---> 87 return create_schema(dataframe, schema_name)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/typedspark/_utils/load_table.py:68, in create_schema(dataframe, schema_name)
66 dataframe = _replace_illegal_column_names(dataframe)
67 schema = create_schema_from_structtype(dataframe.schema, schema_name)
---> 68 dataset = DataSet[schema](dataframe) # type: ignore
69 schema = register_schema_to_dataset(dataset, schema)
70 return dataset, schema
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/typedspark/_core/dataset.py:197, in DataSet.__new__(cls, dataframe)
195 if hasattr(cls, "_schema_annotations"):
196 dataframe._schema_annotations = cls._schema_annotations # type: ignore
--> 197 dataframe._validate_schema()
198 dataframe._add_schema_metadata()
200 return dataframe
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/typedspark/_core/dataset.py:219, in DataSet._validate_schema(self)
215 def _validate_schema(self) -> None:
216 """Validates the schema of the ``DataSet`` against the schema annotations."""
217 validate_schema(
218 self._schema_annotations.get_structtype(),
--> 219 deepcopy(self.schema),
220 self._schema_annotations.get_schema_name(),
221 )
File /databricks/spark/python/pyspark/instrumentation_utils.py:125, in _wrap_cached_property.<locals>.wrapper.__get__(self, instance, owner)
123 try:
124 if logger is None:
--> 125 return self._cached_property.__get__(instance, owner)
127 start = time.perf_counter()
128 try:
File /usr/lib/python3.12/functools.py:995, in cached_property.__get__(self, instance, owner)
993 val = cache.get(self.attrname, _NOT_FOUND)
994 if val is _NOT_FOUND:
--> 995 val = self.func(instance)
996 try:
997 cache[self.attrname] = val
File /databricks/spark/python/pyspark/sql/dataframe.py:651, in DataFrame.schema(self)
649 try:
650 return cast(StructType, _parse_datatype_json_string(self._jdf.schema().json()))
--> 651 except AnalysisException as e:
652 raise e
653 except Exception as e:Metadata
Metadata
Assignees
Labels
No labels