Skip to content

Recursion error when loading in a table in Databricks notebook #715

@ram-seek

Description

@ram-seek

when trying to do something like below in databricks (runtime 16.4 ,which uses spark 3.5.2)

from typedspark import Catalogs,create_schema
db = Catalogs(spark)

I'm running into a strange recursion error .I'm really not sure where to start or if this an already known issue. Any pointers would really help.

NameError: name 'AnalysisException' is not defined
RecursionError: maximum recursion depth exceeded

During handling of the above exception, another exception occurred:
NameError                                 Traceback (most recent call last)
File /databricks/spark/python/pyspark/sql/dataframe.py:650, in DataFrame.schema(self)
    649 try:
--> 650     return cast(StructType, _parse_datatype_json_string(self._jdf.schema().json()))
    651 except AnalysisException as e:
File /databricks/spark/python/pyspark/instrumentation_utils.py:42, in _wrap_function.<locals>.wrapper(*args, **kwargs)
     40 if hasattr(_local, "logging") and _local.logging:
     41     # no need to log since this should be internal call.
---> 42     return func(*args, **kwargs)
     43 _local.logging = True
File /databricks/spark/python/pyspark/sql/dataframe.py:3785, in DataFrame.__getattr__(self, name)
   3753 """Returns the :class:`Column` denoted by ``name``.
   3754 
   3755 .. versionadded:: 1.3.0
   (...)
   3783 +---+
   3784 """
-> 3785 if name not in self.columns:
   3786     raise PySparkAttributeError(
   3787         errorClass="ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": name}
   3788     )
File /databricks/spark/python/pyspark/instrumentation_utils.py:75, in _wrap_property.<locals>.wrapper(self)
     73 if hasattr(_local, "logging") and _local.logging:
     74     # no need to log since this should be internal call.
---> 75     return prop.fget(self)
     76 _local.logging = True
File /databricks/spark/python/pyspark/sql/dataframe.py:2605, in DataFrame.columns(self)
   2532 """
   2533 Retrieves the names of all columns in the :class:`DataFrame` as a list.
   2534 
   (...)
   2603 False
   2604 """
-> 2605 return [f.name for f in self.schema.fields]
File /databricks/spark/python/pyspark/instrumentation_utils.py:120, in _wrap_cached_property.<locals>.wrapper.__get__(self, instance, owner)
    118 if hasattr(_local, "logging") and _local.logging:
    119     # no need to log since this should be internal call.
--> 120     return self._cached_property.__get__(instance, owner)
    122 _local.logging = True
File /usr/lib/python3.12/functools.py:995, in cached_property.__get__(self, instance, owner)
    994 if val is _NOT_FOUND:
--> 995     val = self.func(instance)
    996     try:
File /databricks/spark/python/pyspark/sql/dataframe.py:651, in DataFrame.schema(self)
    650     return cast(StructType, _parse_datatype_json_string(self._jdf.schema().json()))
--> 651 except AnalysisException as e:
    652     raise e
NameError: name 'AnalysisException' is not defined

During handling of the above exception, another exception occurred:
NameError                                 Traceback (most recent call last)
File <command-8218834077646643>, line 1
----> 1 tbl,tbl_schema = db.tes_schema.tbl.load()

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/typedspark/_utils/databases.py:130, in Table.load(self)
    128 def load(self) -> Tuple[DataSet[T], T]:
    129     """Loads the table as a DataSet[T] and returns the schema."""
--> 130     return load_table(  # type: ignore
    131         self._spark,
    132         self.str,
    133         to_camel_case(self._table_name),
    134     )
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/typedspark/_utils/load_table.py:87, in load_table(spark, table_name, schema_name)
     76 """This function loads a ``DataSet``, along with its inferred ``Schema``, in a
     77 notebook.
     78 
   (...)
     84     df, Person = load_table(spark, "path.to.table")
     85 """
     86 dataframe = spark.table(table_name)
---> 87 return create_schema(dataframe, schema_name)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/typedspark/_utils/load_table.py:68, in create_schema(dataframe, schema_name)
     66 dataframe = _replace_illegal_column_names(dataframe)
     67 schema = create_schema_from_structtype(dataframe.schema, schema_name)
---> 68 dataset = DataSet[schema](dataframe)  # type: ignore
     69 schema = register_schema_to_dataset(dataset, schema)
     70 return dataset, schema
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/typedspark/_core/dataset.py:197, in DataSet.__new__(cls, dataframe)
    195 if hasattr(cls, "_schema_annotations"):
    196     dataframe._schema_annotations = cls._schema_annotations  # type: ignore
--> 197     dataframe._validate_schema()
    198     dataframe._add_schema_metadata()
    200 return dataframe
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/typedspark/_core/dataset.py:219, in DataSet._validate_schema(self)
    215 def _validate_schema(self) -> None:
    216     """Validates the schema of the ``DataSet`` against the schema annotations."""
    217     validate_schema(
    218         self._schema_annotations.get_structtype(),
--> 219         deepcopy(self.schema),
    220         self._schema_annotations.get_schema_name(),
    221     )
File /databricks/spark/python/pyspark/instrumentation_utils.py:125, in _wrap_cached_property.<locals>.wrapper.__get__(self, instance, owner)
    123 try:
    124     if logger is None:
--> 125         return self._cached_property.__get__(instance, owner)
    127     start = time.perf_counter()
    128     try:
File /usr/lib/python3.12/functools.py:995, in cached_property.__get__(self, instance, owner)
    993 val = cache.get(self.attrname, _NOT_FOUND)
    994 if val is _NOT_FOUND:
--> 995     val = self.func(instance)
    996     try:
    997         cache[self.attrname] = val
File /databricks/spark/python/pyspark/sql/dataframe.py:651, in DataFrame.schema(self)
    649 try:
    650     return cast(StructType, _parse_datatype_json_string(self._jdf.schema().json()))
--> 651 except AnalysisException as e:
    652     raise e
    653 except Exception as e:

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions