1
1
import pandas as pd
2
+ import pyarrow as pa
2
3
import pytz
3
- from datetime import datetime , timedelta
4
+ from datetime import datetime , timedelta , date , timezone
4
5
from pandas .compat import set_function_name
5
6
from typing import Optional
6
7
@@ -40,7 +41,8 @@ def _apply_updates(
40
41
if throttle > timedelta (0 ):
41
42
csp .schedule_alarm (alarm , throttle , True )
42
43
s_has_time_col = time_col and time_col not in data .keys ()
43
- s_datetime_cols = set ([c for c , t in table .schema ().items () if t == datetime ])
44
+ s_datetime_cols = set ([c for c , t in table .schema ().items () if t == "datetime" ])
45
+ s_date_cols = set ([c for c , t in table .schema ().items () if t == "date" ])
44
46
45
47
with csp .stop ():
46
48
try :
@@ -81,14 +83,23 @@ def _apply_updates(
81
83
row [index_col ] = idx
82
84
if s_has_time_col :
83
85
if localize :
84
- row [time_col ] = pytz .utc .localize (csp .now ())
86
+ row [time_col ] = int ( pytz .utc .localize (csp .now ()). timestamp () * 1000 )
85
87
else :
86
- row [time_col ] = csp .now ()
88
+ row [time_col ] = int ( pytz . utc . localize ( csp .now ()). timestamp () * 1000 )
87
89
else :
88
90
row = new_rows [idx ]
89
91
90
- if localize and col in s_datetime_cols and value .tzinfo is None :
91
- row [col ] = pytz .utc .localize (value )
92
+ if col in s_date_cols :
93
+ row [col ] = int (datetime (year = value .year ,
94
+ month = value .month ,
95
+ day = value .day ,
96
+ tzinfo = timezone .utc ).timestamp () * 1000 )
97
+
98
+ elif localize and col in s_datetime_cols :
99
+ if value .tzinfo is None :
100
+ row [col ] = int (pytz .utc .localize (value ).timestamp () * 1000 )
101
+ else :
102
+ row [col ] = int (pytz .utc .localize (value ).timestamp () * 1000 )
92
103
else :
93
104
row [col ] = value
94
105
@@ -160,28 +171,41 @@ def __init__(
160
171
self ._limit = limit
161
172
self ._localize = localize
162
173
174
+ # TODO: we do not want 1 server per table, make a Client an optional
175
+ self ._psp_server = perspective .Server ()
176
+ self ._psp_client = self ._psp_server .new_local_client ()
177
+
163
178
self ._basket = _frame_to_basket (data )
164
179
self ._static_frame = data .csp .static_frame ()
165
- self ._static_table = perspective . Table (self ._static_frame )
180
+ self ._static_table = self . _psp_client . table (self ._static_frame )
166
181
static_schema = self ._static_table .schema ()
167
182
# Since the index will be accounted for separately, remove the index from the static table schema,
168
183
# and re-enter it under index_col
169
184
raw_index_name = self ._static_frame .index .name or "index"
170
185
index_type = static_schema .pop (raw_index_name )
171
186
schema = {index_col : index_type }
187
+ perspective_type_map = {
188
+ str : "string" ,
189
+ float : "float" ,
190
+ int : "integer" ,
191
+ date : "date" ,
192
+ datetime : "datetime" ,
193
+ bool : "boolean" ,
194
+ }
195
+
172
196
if time_col :
173
- schema [time_col ] = datetime
197
+ schema [time_col ] = " datetime"
174
198
for col , series in data .items ():
175
199
if is_csp_type (series ):
176
- schema [col ] = series .dtype .subtype
200
+ schema [col ] = perspective_type_map [ series .dtype .subtype ]
177
201
else :
178
202
schema [col ] = static_schema [col ]
179
203
180
204
if self ._keep_history :
181
- self ._table = perspective . Table (schema , index = None , limit = limit )
205
+ self ._table = self . _psp_client . table (schema , index = None , limit = limit )
182
206
self ._static_records = self ._static_frame .to_dict (orient = "index" )
183
207
else :
184
- self ._table = perspective . Table (schema , index = self ._index_col )
208
+ self ._table = self . _psp_client . table (schema , index = self ._index_col )
185
209
self ._static_frame .index = self ._static_frame .index .rename (self ._index_col )
186
210
self ._table .update (self ._static_frame )
187
211
self ._static_records = None # No need to update dynamically
@@ -222,7 +246,7 @@ def run_historical(self, starttime, endtime):
222
246
index = self ._index_col
223
247
if self ._limit :
224
248
df = df .sort_values (self ._time_col ).tail (self ._limit ).reset_index (drop = True )
225
- return perspective . Table ( df . to_dict ( "series" ) , index = index )
249
+ return self . _psp_client . table ( df , index = index )
226
250
227
251
def run (self , starttime = None , endtime = timedelta (seconds = 60 ), realtime = True , clear = False ):
228
252
"""Run a graph that sends data to the table on the current thread.
@@ -280,7 +304,7 @@ def get_widget(self, **override_kwargs):
280
304
"sort" : [[self ._time_col , "desc" ]],
281
305
}
282
306
else :
283
- kwargs = {"columns" : list (self ._table .schema ())}
307
+ kwargs = {"columns" : list (self ._table .columns ())}
284
308
kwargs .update (override_kwargs )
285
309
return perspective .PerspectiveWidget (self ._table , ** kwargs )
286
310
@@ -294,14 +318,33 @@ def _method(self, **options):
294
318
295
319
@classmethod
296
320
def _add_view_methods (cls ):
297
- cls .to_df = cls ._create_view_method (perspective .View .to_df )
298
- cls .to_dict = cls ._create_view_method (perspective .View .to_dict )
321
+ # cls.to_df = cls._create_view_method(perspective.View.to_df)
322
+ # cls.to_dict = cls._create_view_method(perspective.View.to_dict)
299
323
cls .to_json = cls ._create_view_method (perspective .View .to_json )
300
324
cls .to_csv = cls ._create_view_method (perspective .View .to_csv )
301
- cls .to_numpy = cls ._create_view_method (perspective .View .to_numpy )
325
+ # cls.to_numpy = cls._create_view_method(perspective.View.to_numpy)
302
326
cls .to_columns = cls ._create_view_method (perspective .View .to_columns )
303
327
cls .to_arrow = cls ._create_view_method (perspective .View .to_arrow )
304
328
329
+ def to_df (self , ** kwargs ):
330
+ ipc_bytes = self .to_arrow ()
331
+ table = pa .ipc .open_stream (ipc_bytes ).read_all ()
332
+ df = pd .DataFrame (table .to_pandas (** kwargs ))
333
+
334
+ # DAVIS: `pyarrow` does not force alphabetical order on categories, so
335
+ # we correct this here to make assertions pass. We can enforce this in
336
+ # Perspective at a performance hit/API complexity.
337
+ for column in df :
338
+ if df [column ].dtype == "datetime64[ms]" :
339
+ df [column ] = df [column ].astype ('datetime64[ns]' )
340
+ elif df [column ].dtype == "category" :
341
+ df [column ] = df [column ].cat .reorder_categories (df [column ].cat .categories .sort_values ())
342
+
343
+ if df .index .dtype == "category" :
344
+ df .index = df .index .cat .reorder_categories (df .index .cat .categories .sort_values ())
345
+
346
+ return df
347
+
305
348
306
349
CspPerspectiveTable ._add_view_methods ()
307
350
0 commit comments