Skip to content

Commit 8e61283

Browse files
authored
Merge pull request #23 from squaredev-io/update/create_multivar_dataframe
Add 'interval' parameter to create_multivar_dataframe function
2 parents 4347e0a + b3f2dc9 commit 8e61283

File tree

1 file changed

+10
-27
lines changed

1 file changed

+10
-27
lines changed

Diff for: timepulse/utils/splits.py

+10-27
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
def create_multivar_dataframe(
8-
base_df: pd.DataFrame, *additional_dfs: pd.DataFrame
8+
base_df: pd.DataFrame, *additional_dfs: pd.DataFrame, interval: str = "M"
99
) -> pd.DataFrame:
1010
"""
1111
Create a multivariate DataFrame by merging a base DataFrame with additional DataFrames based on a date range.
@@ -25,7 +25,7 @@ def create_multivar_dataframe(
2525
max_date = base_df.index.max()
2626

2727
# Create a monthly date range with the last day of each month
28-
date_range = pd.date_range(start=min_date, end=max_date, freq="M")
28+
date_range = pd.date_range(start=min_date, end=max_date, freq=interval)
2929

3030
# Create a DataFrame with the date range to ensure complete coverage
3131
complete_date_range_df = pd.DataFrame({"Date": date_range})
@@ -37,22 +37,16 @@ def create_multivar_dataframe(
3737
# Merge additional DataFrames based on the date range
3838
for additional_df in additional_dfs:
3939
# Merge with the complete date range DataFrame to ensure all dates are included
40-
merged_df = pd.merge(
41-
complete_date_range_df, additional_df, on="Date", how="left"
42-
)
40+
merged_df = pd.merge(complete_date_range_df, additional_df, left_index=True, right_index=True, how="left")
4341
# Fill missing values with 0 and convert to integer
4442
merged_df = merged_df.fillna(0).astype(int)
4543
# Perform the merge with the multivariate DataFrame
46-
multivar_df = pd.merge(
47-
multivar_df, merged_df, left_index=True, right_index=True, how="left"
48-
)
44+
multivar_df = pd.merge(multivar_df, merged_df, left_index=True, right_index=True, how="left")
4945

5046
return multivar_df
5147

5248

53-
def create_windowed_dataframe(
54-
base_df: pd.DataFrame, target_column: str, window_size: int = 3
55-
) -> pd.DataFrame:
49+
def create_windowed_dataframe(base_df: pd.DataFrame, target_column: str, window_size: int = 3) -> pd.DataFrame:
5650
"""
5751
Create a windowed DataFrame by shifting values of a specified column.
5852
@@ -73,9 +67,7 @@ def create_windowed_dataframe(
7367
# Add windowed columns
7468
for i in range(window_size):
7569
# Use loc to modify the copied DataFrame
76-
windowed_df[f"{target_column}-{i+1}"] = windowed_df[target_column].shift(
77-
periods=i + 1
78-
)
70+
windowed_df[f"{target_column}-{i+1}"] = windowed_df[target_column].shift(periods=i + 1)
7971

8072
# Drop rows with NaN values
8173
windowed_df = windowed_df.dropna()
@@ -93,9 +85,7 @@ def get_labelled_windows(x: np.array, horizon: int = 1) -> Tuple[np.array, np.ar
9385
return x[:, :-horizon], x[:, -horizon:]
9486

9587

96-
def make_windows(
97-
x: np.array, window_size: int = 7, horizon: int = 1
98-
) -> Tuple[np.array, np.array]:
88+
def make_windows(x: np.array, window_size: int = 7, horizon: int = 1) -> Tuple[np.array, np.array]:
9989
"""
10090
Create function to view NumPy arrays as windows.
10191
Turns a 1D array into a 2D array of sequential windows of window_size.
@@ -104,10 +94,7 @@ def make_windows(
10494
window_step = np.expand_dims(np.arange(window_size + horizon), axis=0)
10595

10696
# 2. Create a 2D array of multiple window steps (minus 1 to account for 0 indexing)
107-
window_indexes = (
108-
window_step
109-
+ np.expand_dims(np.arange(len(x) - (window_size + horizon - 1)), axis=0).T
110-
)
97+
window_indexes = window_step + np.expand_dims(np.arange(len(x) - (window_size + horizon - 1)), axis=0).T
11198

11299
# 3. Index on the target array (time series) with 2D array of multiple window steps
113100
windowed_array = x[window_indexes]
@@ -118,9 +105,7 @@ def make_windows(
118105
return windows, labels
119106

120107

121-
def make_train_test_splits(
122-
windows: np.array, labels: np.array, test_split: float = 0.1
123-
):
108+
def make_train_test_splits(windows: np.array, labels: np.array, test_split: float = 0.1):
124109
"""
125110
Splits matching pairs of windows and labels into train and test splits.
126111
"""
@@ -138,8 +123,6 @@ def make_window_splits(values: np.array, size: int = 10, horizon: int = 1):
138123
"""
139124
full_windows, full_labels = make_windows(values, window_size=size, horizon=horizon)
140125

141-
train_windows, test_windows, train_labels, test_labels = make_train_test_splits(
142-
full_windows, full_labels
143-
)
126+
train_windows, test_windows, train_labels, test_labels = make_train_test_splits(full_windows, full_labels)
144127

145128
return train_windows, test_windows, train_labels, test_labels

0 commit comments

Comments
 (0)