Skip to content

Commit 148cc01

Browse files
vidhishanairVidhisha Balachandran
and
Vidhisha Balachandran
authored
Vidhisha/ba benchmarks (#88)
Updates to ba calendar: 1. usage reporting 2. constrainedness computing + grouping results by constrainedness bucket --------- Co-authored-by: Vidhisha Balachandran <[email protected]>
1 parent 23e1a87 commit 148cc01

File tree

8 files changed

+345
-28
lines changed

8 files changed

+345
-28
lines changed

eureka_ml_insights/data_utils/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,5 @@
7979
TokenCounterTransform,
8080
MajorityVoteTransform,
8181
NumpyEncoder,
82+
ExtractUsageTransform,
8283
]

eureka_ml_insights/data_utils/ba_calendar_utils.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@ def parse_output_answer(response):
2828

2929
if response is None:
3030
return ""
31+
32+
response = response.replace("**", "").replace("\n", "")
3133

32-
# Try to find an answer in the "Final Answer: X" format
33-
match = re.search(r"(?i)(?<=Final Answer: ).*", response)
34+
match = re.findall(r"(?i)(?<=Final Answer: )(\w+)(?=\s|\W|$)", response)
3435

3536
if match:
36-
answer = match.group(0)
37+
answer = match[len(match) - 1]
3738

3839
return answer

eureka_ml_insights/metrics/ba_calendar_metrics.py

+70-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import ast
66
import json
77
import re
8+
import math
89
import numpy as np
910
from datetime import datetime, timedelta
1011

@@ -48,11 +49,13 @@ def filter_slots_by_constraints(time_slots, constraints, day):
4849
for slot in time_slots:
4950
start_time, end_time = slot
5051
if constraints['no_meetings_before']:
51-
no_meetings_before = datetime.strptime(f"{constraints['no_meetings_before']}:00", "%H:%M")
52+
nb = int(constraints['no_meetings_before'])
53+
no_meetings_before = datetime.strptime(f"{nb}:00", "%H:%M")
5254
if start_time < no_meetings_before:
5355
continue
5456
if constraints['no_meetings_after']:
55-
no_meetings_after = datetime.strptime(f"{constraints['no_meetings_after']}:00", "%H:%M")
57+
na = int(constraints['no_meetings_after'])
58+
no_meetings_after = datetime.strptime(f"{na}:00", "%H:%M")
5659
if end_time >= no_meetings_after:
5760
continue
5861
if constraints['no_meetings_on_weekends'] and day in ['Saturday', 'Sunday']:
@@ -86,6 +89,8 @@ def run_programmatic_tests(self, instance):
8689
solution = solution.strip('"').strip('`').strip('\n')
8790
if check_time_slot_format(solution):
8891
result['format_programmatic'] = 1
92+
else:
93+
result['format_programmatic'] = 0
8994
result.update(self.check_availability_programmatic(instance, solution))
9095
result.update(self.check_meeting_duration_programmatic(instance, solution))
9196
result.update(self.check_buffer_time_programmatic(instance, solution))
@@ -102,6 +107,7 @@ def run_programmatic_tests(self, instance):
102107
passed_constraints.append(value)
103108
result['all_correct'] = all_correct
104109
result['fraction_passed'] = np.mean(passed_constraints)
110+
result.update(self.compute_constrainedness_programmatic(instance))
105111
return result
106112

107113
def is_formatted(self, solution):
@@ -224,12 +230,14 @@ def check_time_restrictions_programmatic(self, instance, solution):
224230
no_meetings_after = instance['constraints'].get('no_meetings_after')
225231

226232
if no_meetings_before:
227-
no_meetings_before = datetime.strptime(f"{no_meetings_before}:00", "%H:%M")
233+
nb = int(no_meetings_before)
234+
no_meetings_before = datetime.strptime(f"{nb}:00", "%H:%M")
228235
if start_time < no_meetings_before:
229236
return {'time_restrictions_programmatic_check': 0}
230237

231238
if no_meetings_after:
232-
no_meetings_after = datetime.strptime(f"{no_meetings_after}:00", '%H:%M')
239+
na = int(no_meetings_after)
240+
no_meetings_after = datetime.strptime(f"{na}:00", '%H:%M')
233241
if end_time > no_meetings_after:
234242
return {'time_restrictions_programmatic_check': 0}
235243
return {'time_restrictions_programmatic_check': 1}
@@ -290,4 +298,61 @@ def check_specific_times_programmatic(self, instance, solution):
290298
result = 0
291299
else:
292300
result = 1
293-
return {'specific_times_programmatic_check': result}
301+
return {'specific_times_programmatic_check': result}
302+
303+
def compute_constrainedness_programmatic(self, instance):
304+
"""
305+
Compute the constrainedness of the problem based on the constraints and availability.
306+
The constrainedness is defined as (1 - the ratio of feasible slots to total slots).
307+
The higher the constrainedness, the more constrained the problem is.
308+
"""
309+
params = instance['params']
310+
constraints = instance['constraints']
311+
metadata = instance['metadata']
312+
if not instance['constraints']['buffer_time_before_and_after_meeting']:
313+
buffer_time_before_and_after_meeting = 0
314+
else:
315+
buffer_time_before_and_after_meeting = instance['constraints']['buffer_time_before_and_after_meeting']
316+
total_slots = 0
317+
feasible_slots = 0
318+
for day in params['days_of_week']:
319+
common_time_slots = None
320+
union_time_slots = None
321+
availability = json.loads(metadata['availability'].replace("'", '"'))
322+
for participant, schedule in availability.items():
323+
if day in schedule:
324+
participant_time_slots = []
325+
participant_time_slots_unconstrained = []
326+
for time_slot in schedule[day]:
327+
start_time, end_time = parse_time_block(time_slot)
328+
time_slots = generate_time_slots(start_time, end_time, params['granularity'])
329+
time_slots = filter_slots_by_duration(time_slots, constraints['meeting_duration'])
330+
participant_time_slots_unconstrained.extend(time_slots)
331+
time_slots = generate_time_slots(start_time, end_time, params['granularity'])
332+
time_slots = filter_slots_by_duration(time_slots, constraints['meeting_duration'] + buffer_time_before_and_after_meeting*2)
333+
time_slots = filter_slots_by_constraints(time_slots, constraints, day=day)
334+
participant_time_slots.extend(time_slots)
335+
if common_time_slots is None:
336+
common_time_slots = set(participant_time_slots)
337+
else:
338+
common_time_slots = common_time_slots.intersection(participant_time_slots)
339+
if union_time_slots is None:
340+
union_time_slots = set(participant_time_slots_unconstrained)
341+
else:
342+
union_time_slots = union_time_slots.union(participant_time_slots_unconstrained)
343+
if common_time_slots:
344+
feasible_slots +=len(common_time_slots)
345+
if union_time_slots:
346+
total_slots+=len(union_time_slots)
347+
348+
# Calculate constrainedness ratio
349+
if total_slots > 0:
350+
constrainedness_ratio = 1 - (feasible_slots / total_slots)
351+
else:
352+
constrainedness_ratio = 1
353+
354+
# Bucket the constrainedness ratio into intervals of 0.2
355+
constrainedness_bucket = round(math.floor(constrainedness_ratio / 0.1) * 0.1, 4)
356+
357+
# Add test result
358+
return {'constrainedness': constrainedness_ratio, 'constrainedness_bucket': constrainedness_bucket}

eureka_ml_insights/metrics/reports.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def _aggregate(self, data):
131131
if len(data) == 0:
132132
averages = {col: 0 for col in self.column_names}
133133
else:
134-
averages = {col: data[col].mean().round(3) for col in self.column_names}
134+
averages = {col: round(data[col].mean(), 3) for col in self.column_names}
135135
self.aggregated_result = averages
136136

137137
def _aggregate_grouped(self, data):

eureka_ml_insights/user_configs/__init__.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
AIME_PIPELINE512Run,
99
AIME_PIPELINE1024Run,
1010
)
11-
from .ba_calendar import BA_Calendar_Parallel_PIPELINE, BA_Calendar_PIPELINE
11+
from .ba_calendar import (
12+
BA_Calendar_PIPELINE,
13+
BA_Calendar_Parallel_PIPELINE,
14+
BA_Calendar_RunEvals_PIPELINE,
15+
)
1216
from .dna import DNA_PIPELINE
1317
from .drop import Drop_Experiment_Pipeline
1418
from .flenqa import FlenQA_Experiment_Pipeline
@@ -120,6 +124,7 @@
120124
DNA_PIPELINE,
121125
BA_Calendar_PIPELINE,
122126
BA_Calendar_Parallel_PIPELINE,
127+
BA_Calendar_RunEvals_PIPELINE,
123128
ToxiGen_Discriminative_PIPELINE,
124129
ToxiGen_Generative_PIPELINE,
125130
Geo_Nondeterminism,

0 commit comments

Comments
 (0)