File tree 10 files changed +21
-20
lines changed
10 files changed +21
-20
lines changed Original file line number Diff line number Diff line change @@ -46,15 +46,15 @@ import os
46
46
import pandas as pd
47
47
import ray
48
48
49
- from datafusion_ray import RaySqlContext
49
+ from datafusion_ray import DatafusionRayContext
50
50
51
51
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__ ))
52
52
53
53
# Start a local cluster
54
54
ray.init(resources = {" worker" : 1 })
55
55
56
56
# Create a context and register a table
57
- ctx = RaySqlContext (2 , use_ray_shuffle = True )
57
+ ctx = DatafusionRayContext (2 , use_ray_shuffle = True )
58
58
# Register either a CSV or Parquet file
59
59
# ctx.register_csv("tips", f"{SCRIPT_DIR}/tips.csv", True)
60
60
ctx.register_parquet(" tips" , f " { SCRIPT_DIR } /tips.parquet " )
Original file line number Diff line number Diff line change @@ -32,14 +32,14 @@ fn main() -> Result<(), String> {
32
32
33
33
// We don't include the proto files in releases so that downstreams
34
34
// do not need to have PROTOC included
35
- if Path :: new ( "src/proto/raysql .proto" ) . exists ( ) {
35
+ if Path :: new ( "src/proto/datafusion-ray .proto" ) . exists ( ) {
36
36
println ! ( "cargo:rerun-if-changed=src/proto/datafusion.proto" ) ;
37
- println ! ( "cargo:rerun-if-changed=src/proto/raysql .proto" ) ;
37
+ println ! ( "cargo:rerun-if-changed=src/proto/datafusion-ray .proto" ) ;
38
38
tonic_build:: configure ( )
39
39
. extern_path ( ".datafusion" , "::datafusion_proto::protobuf" )
40
- . compile ( & [ "src/proto/raysql .proto" ] , & [ "src/proto" ] )
40
+ . compile ( & [ "src/proto/datafusion-ray .proto" ] , & [ "src/proto" ] )
41
41
. map_err ( |e| format ! ( "protobuf compilation failed: {e}" ) ) ?;
42
- let generated_source_path = out. join ( "raysql .protobuf.rs" ) ;
42
+ let generated_source_path = out. join ( "datafusion-ray .protobuf.rs" ) ;
43
43
let code = std:: fs:: read_to_string ( generated_source_path) . unwrap ( ) ;
44
44
let mut file = std:: fs:: OpenOptions :: new ( )
45
45
. write ( true )
Original file line number Diff line number Diff line change 28
28
serialize_execution_plan ,
29
29
deserialize_execution_plan ,
30
30
)
31
- from .context import RaySqlContext
31
+ from .context import DatafusionRayContext
32
32
33
33
__version__ = importlib_metadata .version (__name__ )
Original file line number Diff line number Diff line change 27
27
from datafusion_ray import Context , ExecutionGraph , QueryStage
28
28
from typing import List
29
29
30
+
30
31
def schedule_execution (
31
32
graph : ExecutionGraph ,
32
33
stage_id : int ,
@@ -208,7 +209,7 @@ def execute_query_partition(
208
209
return ret [0 ] if len (ret ) == 1 else ret
209
210
210
211
211
- class RaySqlContext :
212
+ class DatafusionRayContext :
212
213
def __init__ (self , num_workers : int = 1 , use_ray_shuffle : bool = False ):
213
214
self .ctx = Context (num_workers , use_ray_shuffle )
214
215
self .num_workers = num_workers
Original file line number Diff line number Diff line change 20
20
21
21
from pyarrow import csv as pacsv
22
22
import ray
23
- from datafusion_ray import RaySqlContext
23
+ from datafusion_ray import DatafusionRayContext
24
24
25
25
NUM_CPUS_PER_WORKER = 8
26
26
31
31
RESULTS_DIR = f"results-sf{ SF } "
32
32
33
33
34
- def setup_context (use_ray_shuffle : bool , num_workers : int = 2 ) -> RaySqlContext :
34
+ def setup_context (use_ray_shuffle : bool , num_workers : int = 2 ) -> DatafusionRayContext :
35
35
print (f"Using { num_workers } workers" )
36
- ctx = RaySqlContext (num_workers , use_ray_shuffle )
36
+ ctx = DatafusionRayContext (num_workers , use_ray_shuffle )
37
37
for table in [
38
38
"customer" ,
39
39
"lineitem" ,
@@ -53,14 +53,14 @@ def load_query(n: int) -> str:
53
53
return fin .read ()
54
54
55
55
56
- def tpch_query (ctx : RaySqlContext , q : int = 1 ):
56
+ def tpch_query (ctx : DatafusionRayContext , q : int = 1 ):
57
57
sql = load_query (q )
58
58
result_set = ctx .sql (sql )
59
59
return result_set
60
60
61
61
62
62
def tpch_timing (
63
- ctx : RaySqlContext ,
63
+ ctx : DatafusionRayContext ,
64
64
q : int = 1 ,
65
65
print_result : bool = False ,
66
66
write_result : bool = False ,
Original file line number Diff line number Diff line change 19
19
import pandas as pd
20
20
import ray
21
21
22
- from raysql import RaySqlContext
22
+ from datafusion_ray import DatafusionRayContext
23
23
24
24
SCRIPT_DIR = os .path .dirname (os .path .realpath (__file__ ))
25
25
26
26
# Start a local cluster
27
27
ray .init (resources = {"worker" : 1 })
28
28
29
29
# Create a context and register a table
30
- ctx = RaySqlContext (2 , use_ray_shuffle = True )
30
+ ctx = DatafusionRayContext (2 , use_ray_shuffle = True )
31
31
# Register either a CSV or Parquet file
32
32
# ctx.register_csv("tips", f"{SCRIPT_DIR}/tips.csv", True)
33
33
ctx .register_parquet ("tips" , f"{ SCRIPT_DIR } /tips.parquet" )
Original file line number Diff line number Diff line change @@ -44,7 +44,7 @@ use tokio::task::JoinHandle;
44
44
45
45
type PyResultSet = Vec < PyObject > ;
46
46
47
- #[ pyclass( name = "Context" , module = "raysql " , subclass) ]
47
+ #[ pyclass( name = "Context" , module = "datafusion_ray " , subclass) ]
48
48
pub struct PyContext {
49
49
pub ( crate ) ctx : SessionContext ,
50
50
use_ray_shuffle : bool ,
Original file line number Diff line number Diff line change @@ -31,7 +31,7 @@ pub mod utils;
31
31
32
32
/// A Python module implemented in Rust.
33
33
#[ pymodule]
34
- fn _datafusion_ray_internal ( _py : Python , m : & PyModule ) -> PyResult < ( ) > {
34
+ fn _datafusion_ray_internal ( m : & Bound < ' _ , PyModule > ) -> PyResult < ( ) > {
35
35
// register classes that can be created directly from Python code
36
36
m. add_class :: < context:: PyContext > ( ) ?;
37
37
m. add_class :: < planner:: PyExecutionGraph > ( ) ?;
Original file line number Diff line number Diff line change @@ -32,7 +32,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
32
32
use std:: sync:: Arc ;
33
33
use uuid:: Uuid ;
34
34
35
- #[ pyclass( name = "ExecutionGraph" , module = "raysql " , subclass) ]
35
+ #[ pyclass( name = "ExecutionGraph" , module = "datafusion_ray " , subclass) ]
36
36
pub struct PyExecutionGraph {
37
37
pub graph : ExecutionGraph ,
38
38
}
Original file line number Diff line number Diff line change 1
1
syntax = "proto3" ;
2
2
3
- package raysql .protobuf ;
3
+ package datafusion_ray .protobuf ;
4
4
5
5
option java_multiple_files = true ;
6
- option java_package = "raysql .protobuf" ;
6
+ option java_package = "datafusion_ray .protobuf" ;
7
7
option java_outer_classname = "RaySqlProto" ;
8
8
9
9
import "datafusion.proto" ;
You can’t perform that action at this time.
0 commit comments