33import zipfile
44from datetime import datetime
55from datetime import time
6+ from datetime import timedelta
7+ from itertools import cycle
68from typing import Tuple
79
810import pandas as pd
@@ -72,6 +74,40 @@ def create_data():
7274 return Dataset .from_pandas (current , data_definition ), Dataset .from_pandas (reference , data_definition )
7375
7476
77+ def snapshot_tags_generator ():
78+ tags = [
79+ "production_critical" ,
80+ "city_bikes_hourly" ,
81+ "tabular_data" ,
82+ "regression_batch_model" ,
83+ "high_seasonality" ,
84+ "numerical_features" ,
85+ "categorical_features" ,
86+ "no_missing_values" ,
87+ ]
88+
89+ yield from cycle (
90+ [
91+ [tags [0 ], tags [1 ], tags [2 ]],
92+ [tags [1 ]],
93+ [],
94+ [tags [2 ]],
95+ [tags [3 ], tags [4 ]],
96+ [],
97+ [tags [4 ], tags [5 ], tags [6 ], tags [7 ]],
98+ [],
99+ [],
100+ ]
101+ )
102+
103+
104+ SNAPSHOT_TAGS = snapshot_tags_generator ()
105+
106+
107+ def next_snapshot_tags ():
108+ return next (SNAPSHOT_TAGS )
109+
110+
75111def create_snapshot (i : int , data : Tuple [Dataset , Dataset ]):
76112 current , reference = data
77113 report = Report (
@@ -84,7 +120,19 @@ def create_snapshot(i: int, data: Tuple[Dataset, Dataset]):
84120
85121 report .set_batch_size ("daily" )
86122
87- snapshot = report .run (current , reference )
123+ new_current = Dataset .from_pandas (
124+ data = current .as_dataframe ()[
125+ datetime (2023 , 1 , 29 ) + timedelta (days = i ) : datetime (2023 , 1 , 29 ) + timedelta (i + 1 )
126+ ],
127+ data_definition = current .data_definition ,
128+ )
129+
130+ snapshot = report .run (
131+ new_current ,
132+ reference ,
133+ timestamp = datetime (2023 , 1 , 29 ) + timedelta (days = i + 1 ),
134+ tags = next_snapshot_tags (),
135+ )
88136
89137 return snapshot
90138
0 commit comments