@@ -53,7 +53,7 @@ def __del__(self):
53
53
if not self .__in_memory :
54
54
delete_file (self .__filename )
55
55
56
- def from_fireducks (self , df : fd .DataFrame ):
56
+ def from_fireducks (self , df : fd .DataFrame ) -> "DataFlow.DataFrame" :
57
57
if self .__in_memory :
58
58
self .__data = df
59
59
else :
@@ -66,7 +66,7 @@ def to_fireducks(self) -> fd.DataFrame:
66
66
else :
67
67
return to_fireducks_from_file (tmp_filename = self .__filename , file_type = self .__file_type )
68
68
69
- def from_pandas (self , df : pd .DataFrame ):
69
+ def from_pandas (self , df : pd .DataFrame ) -> "DataFlow.DataFrame" :
70
70
if self .__in_memory :
71
71
self .__data = fd .from_pandas (df )
72
72
else :
@@ -79,7 +79,7 @@ def to_pandas(self) -> pd.DataFrame:
79
79
else :
80
80
return to_fireducks_from_file (tmp_filename = self .__filename , file_type = self .__file_type ).to_pandas ()
81
81
82
- def from_polars (self , df : pl .DataFrame ):
82
+ def from_polars (self , df : pl .DataFrame ) -> "DataFlow.DataFrame" :
83
83
if self .__in_memory :
84
84
self .__data = fd .from_pandas (df .to_pandas ())
85
85
else :
@@ -94,91 +94,102 @@ def to_polars(self) -> pl.DataFrame:
94
94
to_fireducks_from_file (tmp_filename = self .__filename , file_type = self .__file_type ).to_pandas ()
95
95
)
96
96
97
- def from_csv (self , filename : str ):
97
+ def from_csv (self , filename : str ) -> "DataFlow.DataFrame" :
98
98
if self .__in_memory :
99
99
self .__data = fd .read_csv (filename )
100
100
else :
101
101
from_csv_2_file (filename = filename , tmp_filename = self .__filename , file_type = self .__file_type )
102
102
return self
103
103
104
- def to_csv (self , filename : str , index = False ):
104
+ def to_csv (self , filename : str , index = False ) -> "DataFlow.DataFrame" :
105
105
if self .__in_memory :
106
106
self .__data .to_csv (filename , index = index )
107
107
else :
108
108
to_csv_from_file (filename = filename , tmp_filename = self .__filename , file_type = self .__file_type )
109
109
return self
110
110
111
- def from_feather (self , filename : str ):
111
+ def from_feather (self , filename : str ) -> "DataFlow.DataFrame" :
112
112
if self .__in_memory :
113
113
self .__data = fd .from_pandas (feather .read_feather (filename ))
114
114
else :
115
115
from_feather_2_file (filename = filename , tmp_filename = self .__filename , file_type = self .__file_type )
116
116
return self
117
117
118
- def to_feather (self , filename : str ):
118
+ def to_feather (self , filename : str ) -> "DataFlow.DataFrame" :
119
119
if self .__in_memory :
120
120
self .__data .to_feather (filename )
121
121
else :
122
122
to_feather_from_file (filename = filename , tmp_filename = self .__filename , file_type = self .__file_type )
123
123
return self
124
124
125
- def from_parquet (self , filename : str ):
125
+ def from_parquet (self , filename : str ) -> "DataFlow.DataFrame" :
126
126
if self .__in_memory :
127
127
self .__data = fd .read_parquet (filename )
128
128
else :
129
129
from_parquet_2_file (filename = filename , tmp_filename = self .__filename , file_type = self .__file_type )
130
130
return self
131
131
132
- def to_parquet (self , filename : str ):
132
+ def to_parquet (self , filename : str ) -> "DataFlow.DataFrame" :
133
133
if self .__in_memory :
134
134
self .__data .to_parquet (filename )
135
135
else :
136
136
to_parquet_from_file (filename = filename , tmp_filename = self .__filename , file_type = self .__file_type )
137
137
return self
138
138
139
- def from_json (self , filename : str ):
139
+ def from_json (self , filename : str ) -> "DataFlow.DataFrame" :
140
140
if self .__in_memory :
141
141
self .__data = fd .read_json (filename )
142
142
else :
143
143
from_json_2_file (filename = filename , tmp_filename = self .__filename , file_type = self .__file_type )
144
144
return self
145
145
146
- def to_json (self , filename : str ):
146
+ def to_json (self , filename : str ) -> "DataFlow.DataFrame" :
147
147
if self .__in_memory :
148
148
self .__data .to_json (filename )
149
149
else :
150
150
to_json_from_file (filename = filename , tmp_filename = self .__filename , file_type = self .__file_type )
151
151
return self
152
152
153
- def from_hdf (self , filename : str ):
153
+ def from_hdf (self , filename : str ) -> "DataFlow.DataFrame" :
154
154
if self .__in_memory :
155
155
self .__data = fd .read_hdf (filename )
156
156
else :
157
157
from_hdf_2_file (filename = filename , tmp_filename = self .__filename , file_type = self .__file_type )
158
158
return self
159
159
160
- def to_hdf (self , filename : str , key : str = "key" ):
160
+ def to_hdf (self , filename : str , key : str = "key" ) -> "DataFlow.DataFrame" :
161
161
if self .__in_memory :
162
162
self .__data .to_hdf (path_or_buf = filename , key = key )
163
163
else :
164
164
to_hdf_from_file (filename = filename , tmp_filename = self .__filename , file_type = self .__file_type , key = key )
165
165
return self
166
166
167
167
def columns (self ) -> list :
168
+ """
169
+ lists columns in data frame
170
+
171
+ :return: list - list of columns in data frame
172
+ """
168
173
if self .__in_memory :
169
174
return self .__data .columns .to_list ()
170
175
else :
171
176
return data_get_columns (tmp_filename = self .__filename , file_type = self .__file_type )
172
177
173
- def columns_delete (self , columns : list ):
178
+ def columns_delete (self , columns : list ) -> "DataFlow.DataFrame" :
174
179
if self .__in_memory :
175
180
self .__data .drop (columns = columns , inplace = True )
176
181
else :
177
182
data_delete_columns (tmp_filename = self .__filename , file_type = self .__file_type , columns = columns )
178
183
179
184
return self
180
185
181
- def columns_rename (self , columns_mapping : dict ):
186
+ def columns_rename (self , columns_mapping : dict ) -> "DataFlow.DataFrame" :
187
+ """
188
+ rename columns
189
+
190
+ :param columns_mapping: dict - old_name: new_name pairs ex. {"Year": "year", "Units": "units"}
191
+ :return:
192
+ """
182
193
if self .__in_memory :
183
194
self .__data .rename (columns = columns_mapping , inplace = True )
184
195
else :
@@ -189,13 +200,19 @@ def columns_rename(self, columns_mapping: dict):
189
200
)
190
201
return self
191
202
192
- def columns_select (self , columns : list ):
203
+ def columns_select (self , columns : list ) -> "DataFlow.DataFrame" :
204
+ """
205
+ columns select - columns to keep in data frame
206
+ :param columns:
207
+ :return:
208
+ """
193
209
if self .__in_memory :
194
210
self .__data = self .__data [columns ]
195
211
else :
196
212
data_select_columns (tmp_filename = self .__filename , file_type = self .__file_type , columns = columns )
213
+ return self
197
214
198
- def filter_on_column (self , column : str , value : Any , operator : Operator ):
215
+ def filter_on_column (self , column : str , value : Any , operator : Operator ) -> "DataFlow.DataFrame" :
199
216
if self .__in_memory :
200
217
match operator :
201
218
case Operator .Eq :
@@ -218,3 +235,4 @@ def filter_on_column(self, column: str, value: Any, operator: Operator):
218
235
value = value ,
219
236
operator = operator ,
220
237
)
238
+ return self
0 commit comments