Skip to content

Commit cdc3221

Browse files
committed
[Chores] Update packagings
Signed-off-by: Austin Liu <[email protected]>
1 parent d5c276a commit cdc3221

File tree

5 files changed

+12
-15
lines changed

5 files changed

+12
-15
lines changed

raysql/__init__.py datafusion_ray/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
except ImportError:
2121
import importlib_metadata
2222

23-
from ._raysql_internal import (
23+
from ._datafusion_ray_internal import (
2424
Context,
2525
ExecutionGraph,
2626
QueryStage,

raysql/context.py datafusion_ray/context.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
import pyarrow as pa
2424
import ray
2525

26-
import raysql
27-
from raysql import Context, ExecutionGraph, QueryStage
26+
import datafusion_ray
27+
from datafusion_ray import Context, ExecutionGraph, QueryStage
2828
from typing import List
2929

3030
def schedule_execution(
@@ -73,7 +73,7 @@ def _get_worker_inputs(
7373
return ids, futures
7474

7575
# schedule the actual execution workers
76-
plan_bytes = raysql.serialize_execution_plan(stage.get_execution_plan())
76+
plan_bytes = datafusion_ray.serialize_execution_plan(stage.get_execution_plan())
7777
futures = []
7878
opt = {}
7979
opt["resources"] = {"worker": 1e-3}
@@ -153,7 +153,7 @@ def _get_worker_inputs(
153153
ray.get([f for _, lst in child_outputs for f in lst])
154154

155155
# schedule the actual execution workers
156-
plan_bytes = raysql.serialize_execution_plan(stage.get_execution_plan())
156+
plan_bytes = datafusion_ray.serialize_execution_plan(stage.get_execution_plan())
157157
futures = []
158158
opt = {}
159159
opt["resources"] = {"worker": 1e-3}
@@ -179,7 +179,7 @@ def execute_query_partition(
179179
*input_partitions: list[pa.RecordBatch],
180180
) -> Iterable[pa.RecordBatch]:
181181
start_time = time.time()
182-
plan = raysql.deserialize_execution_plan(plan_bytes)
182+
plan = datafusion_ray.deserialize_execution_plan(plan_bytes)
183183
# print(
184184
# "Worker executing plan {} partition #{} with shuffle inputs {}".format(
185185
# plan.display(),
@@ -193,7 +193,7 @@ def execute_query_partition(
193193
# This is delegating to DataFusion for execution, but this would be a good place
194194
# to plug in other execution engines by translating the plan into another engine's plan
195195
# (perhaps via Substrait, once DataFusion supports converting a physical plan to Substrait)
196-
ret = raysql.execute_partition(plan, part, partitions)
196+
ret = datafusion_ray.execute_partition(plan, part, partitions)
197197
duration = time.time() - start_time
198198
event = {
199199
"cat": f"{stage_id}-{part}",
@@ -238,7 +238,7 @@ def sql(self, sql: str) -> pa.RecordBatch:
238238
else:
239239
# serialize the query stages and store in Ray object store
240240
query_stages = [
241-
raysql.serialize_execution_plan(
241+
datafusion_ray.serialize_execution_plan(
242242
graph.get_query_stage(i).get_execution_plan()
243243
)
244244
for i in range(final_stage_id + 1)

raysql/main.py datafusion_ray/main.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,15 @@
2020

2121
from pyarrow import csv as pacsv
2222
import ray
23-
from raysql import RaySqlContext
23+
from datafusion_ray import RaySqlContext
2424

2525
NUM_CPUS_PER_WORKER = 8
2626

27-
SF = 10
27+
SF = 1
2828
DATA_DIR = f"/mnt/data0/tpch/sf{SF}-parquet"
2929
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
3030
QUERIES_DIR = os.path.join(SCRIPT_DIR, f"../sqlbench-h/queries/sf={SF}")
3131
RESULTS_DIR = f"results-sf{SF}"
32-
TRUTH_DIR = (
33-
"/home/ubuntu/raysort/ray-sql/sqlbench-runners/spark/{RESULTS_DIR}/{RESULTS_DIR}"
34-
)
3532

3633

3734
def setup_context(use_ray_shuffle: bool, num_workers: int = 2) -> RaySqlContext:
@@ -104,7 +101,7 @@ def compare(q: int):
104101

105102

106103
def tpch_bench():
107-
ray.init("auto")
104+
ray.init(resources={"worker": 1})
108105
num_workers = int(ray.cluster_resources().get("worker", 1)) * NUM_CPUS_PER_WORKER
109106
use_ray_shuffle = False
110107
ctx = setup_context(use_ray_shuffle, num_workers)
File renamed without changes.

raysql/tests/test_context.py datafusion_ray/tests/test_context.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# under the License.
1717

1818
import pytest
19-
from raysql import Context
19+
from datafusion_ray import Context
2020

2121
def test():
2222
ctx = Context(1, False)

0 commit comments

Comments
 (0)