5
5
6
6
7
7
def create_multivar_dataframe (
8
- base_df : pd .DataFrame , * additional_dfs : pd .DataFrame
8
+ base_df : pd .DataFrame , * additional_dfs : pd .DataFrame , interval : str = "M"
9
9
) -> pd .DataFrame :
10
10
"""
11
11
Create a multivariate DataFrame by merging a base DataFrame with additional DataFrames based on a date range.
@@ -25,7 +25,7 @@ def create_multivar_dataframe(
25
25
max_date = base_df .index .max ()
26
26
27
27
# Create a monthly date range with the last day of each month
28
- date_range = pd .date_range (start = min_date , end = max_date , freq = "M" )
28
+ date_range = pd .date_range (start = min_date , end = max_date , freq = interval )
29
29
30
30
# Create a DataFrame with the date range to ensure complete coverage
31
31
complete_date_range_df = pd .DataFrame ({"Date" : date_range })
@@ -37,22 +37,16 @@ def create_multivar_dataframe(
37
37
# Merge additional DataFrames based on the date range
38
38
for additional_df in additional_dfs :
39
39
# Merge with the complete date range DataFrame to ensure all dates are included
40
- merged_df = pd .merge (
41
- complete_date_range_df , additional_df , on = "Date" , how = "left"
42
- )
40
+ merged_df = pd .merge (complete_date_range_df , additional_df , left_index = True , right_index = True , how = "left" )
43
41
# Fill missing values with 0 and convert to integer
44
42
merged_df = merged_df .fillna (0 ).astype (int )
45
43
# Perform the merge with the multivariate DataFrame
46
- multivar_df = pd .merge (
47
- multivar_df , merged_df , left_index = True , right_index = True , how = "left"
48
- )
44
+ multivar_df = pd .merge (multivar_df , merged_df , left_index = True , right_index = True , how = "left" )
49
45
50
46
return multivar_df
51
47
52
48
53
- def create_windowed_dataframe (
54
- base_df : pd .DataFrame , target_column : str , window_size : int = 3
55
- ) -> pd .DataFrame :
49
+ def create_windowed_dataframe (base_df : pd .DataFrame , target_column : str , window_size : int = 3 ) -> pd .DataFrame :
56
50
"""
57
51
Create a windowed DataFrame by shifting values of a specified column.
58
52
@@ -73,9 +67,7 @@ def create_windowed_dataframe(
73
67
# Add windowed columns
74
68
for i in range (window_size ):
75
69
# Use loc to modify the copied DataFrame
76
- windowed_df [f"{ target_column } -{ i + 1 } " ] = windowed_df [target_column ].shift (
77
- periods = i + 1
78
- )
70
+ windowed_df [f"{ target_column } -{ i + 1 } " ] = windowed_df [target_column ].shift (periods = i + 1 )
79
71
80
72
# Drop rows with NaN values
81
73
windowed_df = windowed_df .dropna ()
@@ -93,9 +85,7 @@ def get_labelled_windows(x: np.array, horizon: int = 1) -> Tuple[np.array, np.ar
93
85
return x [:, :- horizon ], x [:, - horizon :]
94
86
95
87
96
- def make_windows (
97
- x : np .array , window_size : int = 7 , horizon : int = 1
98
- ) -> Tuple [np .array , np .array ]:
88
+ def make_windows (x : np .array , window_size : int = 7 , horizon : int = 1 ) -> Tuple [np .array , np .array ]:
99
89
"""
100
90
Create function to view NumPy arrays as windows.
101
91
Turns a 1D array into a 2D array of sequential windows of window_size.
@@ -104,10 +94,7 @@ def make_windows(
104
94
window_step = np .expand_dims (np .arange (window_size + horizon ), axis = 0 )
105
95
106
96
# 2. Create a 2D array of multiple window steps (minus 1 to account for 0 indexing)
107
- window_indexes = (
108
- window_step
109
- + np .expand_dims (np .arange (len (x ) - (window_size + horizon - 1 )), axis = 0 ).T
110
- )
97
+ window_indexes = window_step + np .expand_dims (np .arange (len (x ) - (window_size + horizon - 1 )), axis = 0 ).T
111
98
112
99
# 3. Index on the target array (time series) with 2D array of multiple window steps
113
100
windowed_array = x [window_indexes ]
@@ -118,9 +105,7 @@ def make_windows(
118
105
return windows , labels
119
106
120
107
121
- def make_train_test_splits (
122
- windows : np .array , labels : np .array , test_split : float = 0.1
123
- ):
108
+ def make_train_test_splits (windows : np .array , labels : np .array , test_split : float = 0.1 ):
124
109
"""
125
110
Splits matching pairs of windows and labels into train and test splits.
126
111
"""
@@ -138,8 +123,6 @@ def make_window_splits(values: np.array, size: int = 10, horizon: int = 1):
138
123
"""
139
124
full_windows , full_labels = make_windows (values , window_size = size , horizon = horizon )
140
125
141
- train_windows , test_windows , train_labels , test_labels = make_train_test_splits (
142
- full_windows , full_labels
143
- )
126
+ train_windows , test_windows , train_labels , test_labels = make_train_test_splits (full_windows , full_labels )
144
127
145
128
return train_windows , test_windows , train_labels , test_labels
0 commit comments