@@ -71,26 +71,44 @@ def lambda_handler(event, context):
7171 desired_count_index = metadata ["desired_count_index" ]
7272 current_desired_count = DESIRED_COUNTS [desired_count_index ]
7373
74- workload_date = metadata .get ("workload_date" )
74+ # 2. Determine Execution Parameters (Date Check & Index Rotation)
75+ is_first_time_optimization = False
7576
76- # 2. Check Date for First Time Optimization
77+ # Check Workload Date
7778 if workload_date != current_date :
78- Logger .info (f"Workload date changed: { workload_date } -> { current_date } . Running First Time Optimization with Count: { current_desired_count } " )
79- # Use the rotated desired count for the optimization run too
80- sps_df = load_sps .collect_spot_placement_score_first_time (desired_counts = [current_desired_count ])
79+ Logger .info (f"Workload date changed: { workload_date } -> { current_date } . Prepared First Time Optimization." )
80+ is_first_time_optimization = True
81+
82+ # Update Metadata: Date
8183 metadata ["workload_date" ] = current_date
84+
85+ # Force Desired Count to 1 for First Time Optimization execution
86+ # Note: We do NOT reset the index here. We continue rotation seamlessly.
87+ current_execution_desired_count = 1
8288 else :
83- Logger .info (f"Running Regular Collection. Desired Count: { current_desired_count } (Index: { desired_count_index } )" )
84- sps_df = load_sps .collect_spot_placement_score (desired_counts = [current_desired_count ])
89+ current_execution_desired_count = current_desired_count
8590
86- # 3. Update Index for Next Run
91+ # Update Metadata: Next Index (Always rotate to prevent stuck loops)
8792 next_index = (desired_count_index + 1 ) % len (DESIRED_COUNTS )
8893 metadata ["desired_count_index" ] = next_index
94+
95+ # 3. Save Metadata (State Commit BEFORE Execution)
8996 try :
9097 write_metadata (metadata )
9198 except Exception as e :
9299 Logger .error (f"Failed to write metadata: { e } " )
93- raise
100+ # Log but proceed. If write failed, we might retry same index next time,
101+ # but if execution succeeds, at least data is collected.
102+ # If execution also fails, we risk loop, but S3 failure is rare compared to API Timeout.
103+
104+ # 4. Execute Logic
105+ if is_first_time_optimization :
106+ Logger .info (f"Executing First Time Optimization with Count: { current_execution_desired_count } (Forced)" )
107+ sps_df = load_sps .collect_spot_placement_score_first_time (desired_counts = [current_execution_desired_count ])
108+ else :
109+ Logger .info (f"Executing Regular Collection. Desired Count: { current_execution_desired_count } (Index: { desired_count_index } )" )
110+ sps_df = load_sps .collect_spot_placement_score (desired_counts = [current_execution_desired_count ])
111+
94112
95113 else :
96114 # --- Legacy Fallback Logic: S3 Metadata Missing ---
0 commit comments