1515# limitations under the License.
1616#
1717
18- from typing import Dict , Set , List , cast
18+ from typing import Dict , Set , List , Optional , cast
1919
2020from pywy .core .core import Plugin , PywyPlan
2121from pywy .operators .base import PO_T
2222from pywy .types import (GenericTco , Predicate , Function , BiFunction , FlatmapFunction , IterableOut , T , In , Out )
2323from pywy .operators import *
24- from pywy .basic .model . ops import Op
24+ from pywy .basic .data . record import Record
2525from pywy .basic .model .option import Option
2626from pywy .basic .model .models import Model
2727
@@ -50,7 +50,6 @@ def __init__(self, configuration: Configuration = Configuration()):
5050 """
5151 add a :class:`Plugin` to the :class:`Context`
5252 """
53-
5453 def register (self , * plugins : Plugin ):
5554 for p in plugins :
5655 self .plugins .update (p )
@@ -59,15 +58,19 @@ def register(self, *plugins: Plugin):
5958 """
6059 remove a :class:`Plugin` from the :class:`Context`
6160 """
62-
6361 def unregister (self , * plugins : Plugin ):
6462 for p in plugins :
6563 self .plugins .remove (p )
6664 return self
6765
68- def textfile (self , file_path : str ) -> ' DataQuanta[str]' :
66+ def textfile (self , file_path : str ) -> " DataQuanta[str]" :
6967 return DataQuanta (self , TextFileSource (file_path ))
7068
69+ def parquet (
70+ self , file_path : str , projection : Optional [List [str ]] = None , column_names : Optional [List [str ]] = None
71+ ) -> "DataQuanta[Record]" :
72+ return DataQuanta (self , ParquetSource (file_path , projection , column_names ))
73+
7174 def __str__ (self ):
7275 return "Plugins: {}" .format (str (self .plugins ))
7376
@@ -88,25 +91,31 @@ def __init__(self, context: WayangContext, operator: PywyOperator):
8891 def filter (self : "DataQuanta[T]" , p : Predicate , input_type : GenericTco = None ) -> "DataQuanta[T]" :
8992 return DataQuanta (self .context , self ._connect (FilterOperator (p , input_type )))
9093
91- def map (self : "DataQuanta[In]" , f : Function , input_type : GenericTco = None , output_type : GenericTco = None ) -> "DataQuanta[Out]" :
94+ def map (
95+ self : "DataQuanta[In]" ,
96+ f : Function ,
97+ input_type : GenericTco = None ,
98+ output_type : GenericTco = None
99+ ) -> "DataQuanta[Out]" :
92100 return DataQuanta (self .context , self ._connect (MapOperator (f , input_type , output_type )))
93101
94- def flatmap (self : "DataQuanta[In]" , f : FlatmapFunction , input_type : GenericTco = None , output_type : GenericTco = None ) -> "DataQuanta[IterableOut]" :
102+ def flatmap (
103+ self : "DataQuanta[In]" ,
104+ f : FlatmapFunction ,
105+ input_type : GenericTco = None ,
106+ output_type : GenericTco = None
107+ ) -> "DataQuanta[IterableOut]" :
95108 return DataQuanta (self .context , self ._connect (FlatmapOperator (f , input_type , output_type )))
96109
97- def reduce_by_key (self : "DataQuanta[In]" ,
98- key_f : Function ,
99- f : BiFunction ,
100- input_type : GenericTco = None
101- ) -> "DataQuanta[IterableOut]" :
102-
110+ def reduce_by_key (
111+ self : "DataQuanta[In]" ,
112+ key_f : Function ,
113+ f : BiFunction ,
114+ input_type : GenericTco = None
115+ ) -> "DataQuanta[IterableOut]" :
103116 return DataQuanta (self .context , self ._connect (ReduceByKeyOperator (key_f , f , input_type )))
104117
105- def sort (self : "DataQuanta[In]" ,
106- key_f : Function ,
107- input_type : GenericTco = None
108- ) -> "DataQuanta[IterableOut]" :
109-
118+ def sort (self : "DataQuanta[In]" , key_f : Function , input_type : GenericTco = None ) -> "DataQuanta[IterableOut]" :
110119 return DataQuanta (self .context , self ._connect (SortOperator (key_f , input_type )))
111120
112121 def join (
@@ -115,21 +124,34 @@ def join(
115124 that : "DataQuanta[In]" ,
116125 that_key_f : Function ,
117126 input_type : GenericTco = None ,
118- output_type : GenericTco = None
119- ) -> "DataQuanta[Out]" :
120-
127+ ) -> "DataQuanta[Out]" :
121128 op = JoinOperator (
122129 this_key_f ,
123130 that ,
124131 that_key_f ,
125- input_type ,
126- output_type
132+ input_type
127133 )
128134
129135 self ._connect (op ),
130136 return DataQuanta (
131137 self .context ,
132- that ._connect (op ,1 )
138+ that ._connect (op , 1 )
139+ )
140+
141+ def cartesian (
142+ self : "DataQuanta[In]" ,
143+ that : "DataQuanta[In]" ,
144+ input_type : GenericTco = None ,
145+ ) -> "DataQuanta[Out]" :
146+ op = CartesianOperator (
147+ that ,
148+ input_type
149+ )
150+
151+ self ._connect (op ),
152+ return DataQuanta (
153+ self .context ,
154+ that ._connect (op , 1 )
133155 )
134156
135157 def dlTraining (
@@ -140,7 +162,6 @@ def dlTraining(
140162 input_type : GenericTco ,
141163 output_type : GenericTco
142164 ) -> "DataQuanta[Out]" :
143-
144165 op = DLTrainingOperator (
145166 model ,
146167 option ,
@@ -151,7 +172,7 @@ def dlTraining(
151172
152173 return DataQuanta (
153174 self .context ,
154- that ._connect (op ,1 )
175+ that ._connect (op , 1 )
155176 )
156177
157178 def predict (
@@ -169,10 +190,10 @@ def predict(
169190
170191 return DataQuanta (
171192 self .context ,
172- that ._connect (op ,1 )
193+ that ._connect (op , 1 )
173194 )
174195
175- def store_textfile (self : "DataQuanta[In]" , path : str , input_type : GenericTco = None ):
196+ def store_textfile (self : "DataQuanta[In]" , path : str , input_type : GenericTco = None ) -> None :
176197 last : List [SinkOperator ] = [
177198 cast (
178199 SinkOperator ,
@@ -184,7 +205,6 @@ def store_textfile(self: "DataQuanta[In]", path: str, input_type: GenericTco = N
184205 )
185206 )
186207 ]
187- #print(PywyPlan(self.context.plugins, last))
188208 PywyPlan (self .context .plugins , self .context .configuration .entries , last ).execute ()
189209
190210 def _connect (self , op : PO_T , port_op : int = 0 ) -> PywyOperator :
0 commit comments