Skip to content

Error: runtime error with pyspark quickstart notebook  #24

@silpara

Description

@silpara

I installed the conda environment using conda env create -f envs/pyspark-330-delta-220 and tried running the notebook
01_quickstart.ipynb but I get the following error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[3], line 1
----> 1 spark = configure_spark_with_delta_pip(builder).getOrCreate()

File ~\anaconda3\envs\pyspark-330-delta-220\lib\site-packages\pyspark\sql\session.py:269, in SparkSession.Builder.getOrCreate(self)
    267     sparkConf.set(key, value)
    268 # This SparkContext may be an existing one.
--> 269 sc = SparkContext.getOrCreate(sparkConf)
    270 # Do not update `SparkConf` for existing `SparkContext`, as it's shared
    271 # by all sessions.
    272 session = SparkSession(sc, options=self._options)

File ~\anaconda3\envs\pyspark-330-delta-220\lib\site-packages\pyspark\context.py:483, in SparkContext.getOrCreate(cls, conf)
    481 with SparkContext._lock:
    482     if SparkContext._active_spark_context is None:
--> 483         SparkContext(conf=conf or SparkConf())
    484     assert SparkContext._active_spark_context is not None
    485     return SparkContext._active_spark_context

File ~\anaconda3\envs\pyspark-330-delta-220\lib\site-packages\pyspark\context.py:195, in SparkContext.__init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls, udf_profiler_cls)
    189 if gateway is not None and gateway.gateway_parameters.auth_token is None:
    190     raise ValueError(
    191         "You are trying to pass an insecure Py4j gateway to Spark. This"
    192         " is not allowed as it is a security risk."
    193     )
--> 195 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
    196 try:
    197     self._do_init(
    198         master,
    199         appName,
   (...)
    208         udf_profiler_cls,
    209     )

File ~\anaconda3\envs\pyspark-330-delta-220\lib\site-packages\pyspark\context.py:417, in SparkContext._ensure_initialized(cls, instance, gateway, conf)
    415 with SparkContext._lock:
    416     if not SparkContext._gateway:
--> 417         SparkContext._gateway = gateway or launch_gateway(conf)
    418         SparkContext._jvm = SparkContext._gateway.jvm
    420     if instance:

File ~\anaconda3\envs\pyspark-330-delta-220\lib\site-packages\pyspark\java_gateway.py:106, in launch_gateway(conf, popen_kwargs)
    103     time.sleep(0.1)
    105 if not os.path.isfile(conn_info_file):
--> 106     raise RuntimeError("Java gateway process exited before sending its port number")
    108 with open(conn_info_file, "rb") as info:
    109     gateway_port = read_int(info)

RuntimeError: Java gateway process exited before sending its port number

I am on windows 11 machine.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions