3
3
from copy import deepcopy
4
4
from itertools import product
5
5
from collections import OrderedDict
6
+ import datetime
6
7
8
+ import numpy as np
7
9
import pandas as pd
8
10
from mindsdb_forecast_visualizer .core .plotter import plot
9
11
10
- from lightwood . data . cleaner import _standardize_datetime
12
+ from dataprep_ml . cleaners import _standardize_datetime
11
13
12
14
13
15
def forecast (model ,
@@ -33,7 +35,6 @@ def forecast(model,
33
35
34
36
if show_insample and len (backfill ) == 0 :
35
37
raise Exception ("You must pass a dataframe with the predictor's training data to show in-sample forecasts." )
36
- predargs ['time_format' ] = 'infer'
37
38
38
39
# instantiate series according to groups
39
40
group_values = OrderedDict ()
@@ -58,14 +59,20 @@ def forecast(model,
58
59
if g == ():
59
60
g = '__default'
60
61
try :
61
- filtered_backfill , test_data = get_group (g , subset , data , backfill , group_keys , order )
62
+ filtered_backfill , filtered_data = get_group (g , subset , data , backfill , group_keys , order )
62
63
63
- if test_data .shape [0 ] > 0 :
64
+ if filtered_data .shape [0 ] > 0 :
64
65
print (f'Plotting for group { g } ...' )
65
- original_test_data = test_data
66
- test_data = test_data .iloc [[0 ]] # library only supports plotting first horizon inside test dataset
67
66
68
- filtered_data = pd .concat ([filtered_backfill .iloc [- warm_start_offset :], test_data ])
67
+ # check offset for warm start
68
+ special_mixers = ['GluonTSMixer' , 'NHitsMixer' ]
69
+ if hasattr (model .ensemble , 'indexes_by_accuracy' ) and \
70
+ (model .mixers [model .ensemble .indexes_by_accuracy [0 ]].__class__ .__name__ in special_mixers ):
71
+ filtered_data = pd .concat ([filtered_backfill .iloc [- warm_start_offset :], filtered_data .iloc [[0 ]]])
72
+ else :
73
+ filtered_data = pd .concat ([filtered_backfill .iloc [- warm_start_offset :], filtered_data ])
74
+
75
+
69
76
if not tss .allow_incomplete_history :
70
77
assert filtered_data .shape [0 ] > tss .window
71
78
@@ -83,24 +90,26 @@ def forecast(model,
83
90
84
91
# forecast & divide into in-sample and out-sample predictions, if required
85
92
if show_insample :
93
+ offset = predargs .get ('forecast_offset' , 0 )
86
94
predargs ['forecast_offset' ] = - len (filtered_backfill )
87
95
model_fit = model .predict (filtered_backfill , args = predargs )
96
+ predargs ['forecast_offset' ] = offset
88
97
else :
89
98
model_fit = None
90
99
if len (filtered_backfill ) > 0 :
91
- time_target += [t for t in filtered_backfill [tss .order_by ]]
92
100
pred_target += [None for _ in range (len (filtered_backfill ))]
93
101
conf_lower += [None for _ in range (len (filtered_backfill ))]
94
102
conf_upper += [None for _ in range (len (filtered_backfill ))]
95
103
anomalies += [None for _ in range (len (filtered_backfill ))]
96
104
97
105
predargs ['forecast_offset' ] = - warm_start_offset
98
106
model_forecast = model .predict (filtered_data , args = predargs ).iloc [warm_start_offset :]
99
- real_target += [r for r in original_test_data [target ]][:tss .horizon ]
107
+ filtered_data = filtered_data .iloc [warm_start_offset :]
108
+ real_target += [float (r ) for r in filtered_data [target ]][:tss .horizon ]
100
109
101
- # edge case: convert one-step-ahead predictions to unitary lists
110
+ # convert one-step-ahead predictions to unitary lists
102
111
if not isinstance (model_forecast ['prediction' ].iloc [0 ], list ):
103
- for k in ['prediction' , 'lower' , 'upper' ] + [f'order_{ i } ' for i in tss .order_by ]:
112
+ for k in ['prediction' , 'lower' , 'upper' ] + [f'order_{ tss .order_by } ' ]:
104
113
model_forecast [k ] = model_forecast [k ].apply (lambda x : [x ])
105
114
if show_insample :
106
115
model_fit [k ] = model_fit [k ].apply (lambda x : [x ])
@@ -109,10 +118,11 @@ def forecast(model,
109
118
pred_target += [p [0 ] for p in model_fit ['prediction' ]]
110
119
conf_lower += [p [0 ] for p in model_fit ['lower' ]]
111
120
conf_upper += [p [0 ] for p in model_fit ['upper' ]]
121
+ time_target += [p [0 ] for p in model_fit [f'order_{ order } ' ]]
112
122
if 'anomaly' in model_fit .columns :
113
123
anomalies += [p for p in model_fit ['anomaly' ]]
114
124
115
- # forecast always corresponds to predicted arrays for the first out-of-sample query data point
125
+ # forecast corresponds to predicted arrays for the first out-of-sample query data point
116
126
fcst = {
117
127
'prediction' : model_forecast ['prediction' ].iloc [0 ],
118
128
'lower' : model_forecast ['lower' ].iloc [0 ],
@@ -134,10 +144,23 @@ def forecast(model,
134
144
pred_target += [p for p in fcst ['prediction' ]]
135
145
conf_lower += [p for p in fcst ['lower' ]]
136
146
conf_upper += [p for p in fcst ['upper' ]]
137
- time_target += [r for r in original_test_data [tss .order_by ]][:tss .horizon ]
147
+
148
+ # fix timestamps
149
+ time_target = [pd .to_datetime (p ).timestamp () for p in filtered_data [order ]]
150
+ try :
151
+ delta = model .ts_analysis ['deltas' ][g ]
152
+ except :
153
+ delta = model .ts_analysis ['deltas' ].get (tuple ([str (gg ) for gg in g ]),
154
+ model .ts_analysis ['deltas' ]['__default' ])
155
+
156
+ for i in range (len (pred_target ) - len (time_target )):
157
+ time_target .insert (0 , time_target [0 ] - delta )
158
+
159
+
160
+ time_target = [datetime .datetime .utcfromtimestamp (ts ).strftime ('%Y-%m-%d %H:%M:%S' ) for ts in time_target ]
138
161
139
162
# round confidences
140
- conf = model_forecast ['confidence' ].values .mean ()
163
+ conf = np . array ([ np . array ( l ) for l in model_forecast ['confidence' ].values ]) .mean ()
141
164
142
165
# set titles and legends
143
166
if g != ():
@@ -161,6 +184,8 @@ def forecast(model,
161
184
anomalies = anomalies if show_anomaly else None ,
162
185
separate = separate )
163
186
fig .show ()
187
+ else :
188
+ print (f"No data for group { g } . Skipping..." )
164
189
165
190
except Exception :
166
191
print (f"Error in group { g } :" )
@@ -173,11 +198,11 @@ def get_group(g, subset, data, backfill, group_keys, order):
173
198
group_dict = {k : v for k , v in zip (group_keys , g )}
174
199
175
200
if subset is None or group_dict in subset :
176
- filtered_data = deepcopy ( data )
177
- filtered_backfill = deepcopy ( backfill )
201
+ filtered_data = data
202
+ filtered_backfill = backfill
178
203
for k , v in group_dict .items ():
179
- filtered_data = filtered_data [filtered_data [k ] == v ]
180
- filtered_backfill = filtered_backfill [filtered_backfill [k ] == v ]
204
+ filtered_data = deepcopy ( filtered_data [filtered_data [k ] == v ])
205
+ filtered_backfill = deepcopy ( filtered_backfill [filtered_backfill [k ] == v ])
181
206
182
207
filtered_data = filtered_data .drop_duplicates (subset = order )
183
208
filtered_backfill = filtered_backfill .drop_duplicates (subset = order )
0 commit comments