Skip to content

Error with mosaic.enable_mosaic() when created Databricks' DLT Pipeline with Mosaic #538

@slothPete7773

Description

@slothPete7773

Describe the bug
The error was raised when I tried to start a DLT pipeline with Databricks notebook, which just to start experimenting the DLT.
The primary library was Mosaic, which is instructed for installation prior importing.
The code is roughly as follow

$ %pip install databricks-mosaic
import mosaic as mos
mos.enable_mosaic(spark, dbutils) # Error line 
import dlt
from pyspark.sql.functions import *
from pyspark.sql.types import *

@dlt.table(comment="Testing a DLT table for area coverage")
def area():
    return spark.read.table("area")

The following error was raised

java.lang.RuntimeException: Failed to execute python command for notebook '/Users/[email protected]/test DLT' with id RunnableCommandId(66333709513xxxxxxxx) and error AnsiResult(---------------------------------------------------------------------------
Py4JError                                 Traceback (most recent call last)
File <command--1>:3
      1 import mosaic as mos
----> 3 mos.enable_mosaic(spark, dbutils)
      5 import dlt
      6 # import pyspark.sql.functions as 

File /local_disk0/.ephemeral_nfs/envs/pythonEnv-e1a54f86-17c5-4856-abb7-5a8e08a9bfed/lib/python3.9/site-packages/mosaic/api/enable.py:47, in enable_mosaic(spark, dbutils)
     14 """
     15 Enable Mosaic functions.
     16 
   (...)
     44 
     45 """
     46 config.mosaic_spark = spark
---> 47 _ = MosaicLibraryHandler(config.mosaic_spark)
     48 config.mosaic_context = MosaicContext(config.mosaic_spark)
     50 # Register SQL functions

File /local_disk0/.ephemeral_nfs/envs/pythonEnv-e1a54f86-17c5-4856-abb7-5a8e08a9bfed/lib/python3.9/site-packages/mosaic/core/library_handler.py:18, in MosaicLibraryHandler.__init__(self, spark)
     16 self.spark = spark
     17 self.sc = spark.sparkContext
---> 18 self.sc.setLogLevel("info")
     19 log4jLogger = self.sc._jvm.org.apache.log4j
     20 LOGGER = log4jLogger.LogManager.getLogger(__class__.__name__)

File /databricks/spark/python/pyspark/context.py:575, in SparkContext.setLogLevel(self, logLevel)
    559 def setLogLevel(self, logLevel: str) -> None:
    560     """
    561     Control our logLevel. This overrides any user-defined log settings.
    562     Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
   (...)
    573     >>> sc.setLogLevel("WARN")  # doctest :+SKIP
    574     """
--> 575     self._jsc.setLogLevel(logLevel)

File /databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py:1321, in JavaMember.__call__(self, *args)
   1315 command = proto.CALL_COMMAND_NAME +\
   1316     self.command_header +\
   1317     args_command +\
   1318     proto.END_COMMAND_PART
   1320 answer = self.gateway_client.send_command(command)
-> 1321 return_value = get_return_value(
   1322     answer, self.gateway_client, self.target_id, self.name)
   1324 for temp_arg in temp_args:
   1325     temp_arg._detach()

File /databricks/spark/python/pyspark/errors/exceptions.py:228, in capture_sql_exception.<locals>.deco(*a, **kw)
    226 def deco(*a: Any, **kw: Any) -> Any:
    227     try:
--> 228         return f(*a, **kw)
    229     except Py4JJavaError as e:
    230         converted = convert_exception(e.java_exception)

File /databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/protocol.py:330, in get_return_value(answer, gateway_client, target_id, name)
    326         raise Py4JJavaError(
    327             "An error occurred while calling {0}{1}{2}.\n".
    328             format(target_id, ".", name), value)
    329     else:
--> 330         raise Py4JError(
    331             "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
    332             format(target_id, ".", name, value))
    333 else:
    334     raise Py4JError(
    335         "An error occurred while calling {0}{1}{2}".
    336         format(target_id, ".", name))

Py4JError: An error occurred while calling o425.setLogLevel. Trace:
py4j.security.Py4JSecurityException: Method public void org.apache.spark.api.java.JavaSparkContext.setLogLevel(java.lang.String) is not whitelisted on class class org.apache.spark.api.java.JavaSparkContext
	at py4j.security.WhitelistingPy4JSecurityManager.checkCall(WhitelistingPy4JSecurityManager.java:473)
	at py4j.Gateway.invoke(Gateway.java:305)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
	at java.lang.Thread.run(Thread.java:750)

,None,Map(),Map(),List(),List(),Map())

The following is the DLT Pipeline setting JSON.

{
    "id": "ce6e63a5-bef4-405c-90f9-02cd9b890b18",
    "pipeline_type": "WORKSPACE",
    "clusters": [
        {
            "label": "default",
            "node_type_id": "m5d.large",
            "driver_node_type_id": "m5d.large",
            "custom_tags": {
                "type": "test"
            },
            "num_workers": 1
        },
        {
            "label": "maintenance",
            "custom_tags": {
                "type": "test"
            }
        }
    ],
    "development": true,
    "continuous": false,
    "channel": "CURRENT",
    "photon": true,
    "libraries": [
        {
            "notebook": {
                "path": "/Users/[email protected]/test DLT"
            }
        }
    ],
    "name": "areaCov",
    "edition": "CORE",
    "catalog": "workspace",
    "target": "default",
    "data_sampling": false
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions