Skip to content

Commit b5a41d9

Browse files
authored
Ignore execution_time/score regressions when binaries have same hash (#140)
We can remove a lot of the noise from execution time results by just ignoring any changes when the binary hashes are the same. This implements it by adding a column to the TestSuiteSampleFields table, similar to bigger_is_better, and setting it to true for execution_time. We want to still flag regressions on identical binaries for e.g. compile time. This was my first time poking around the migrations infrastructure. They get automatically applied whenever the server is launched so there shouldn't be any need to manually run any scripts.
1 parent 1f1f380 commit b5a41d9

File tree

6 files changed

+81
-8
lines changed

6 files changed

+81
-8
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""Adds a ignore_same_hash column to the sample fields table and sets it to
2+
true for execution_time.
3+
4+
"""
5+
6+
from sqlalchemy import Column, Integer, update
7+
8+
from lnt.server.db.migrations.util import introspect_table
9+
from lnt.server.db.util import add_column
10+
11+
12+
def upgrade(engine):
13+
ignore_same_hash = Column("ignore_same_hash", Integer, default=0)
14+
add_column(engine, "TestSuiteSampleFields", ignore_same_hash)
15+
16+
test_suite_sample_fields = introspect_table(engine, "TestSuiteSampleFields")
17+
set_init_value = update(test_suite_sample_fields).values(ignore_same_hash=0)
18+
set_exec_time = (
19+
update(test_suite_sample_fields)
20+
.where(
21+
(test_suite_sample_fields.c.Name == "execution_time") |
22+
(test_suite_sample_fields.c.Name == "score")
23+
)
24+
.values(ignore_same_hash=1)
25+
)
26+
27+
with engine.begin() as trans:
28+
trans.execute(set_init_value)
29+
trans.execute(set_exec_time)

lnt/server/db/testsuite.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ def from_json(data):
173173
for index, metric_desc in enumerate(data['metrics']):
174174
name = metric_desc['name']
175175
bigger_is_better = metric_desc.get('bigger_is_better', False)
176+
ignore_same_hash = metric_desc.get('ignore_same_hash', False)
176177
metric_type_name = metric_desc.get('type', 'Real')
177178
display_name = metric_desc.get('display_name')
178179
unit = metric_desc.get('unit')
@@ -182,8 +183,10 @@ def from_json(data):
182183
metric_type_name)
183184
metric_type = SampleType(metric_type_name)
184185
bigger_is_better_int = 1 if bigger_is_better else 0
186+
ignore_same_hash_int = 1 if ignore_same_hash else 0
185187
field = SampleField(name, metric_type, index, status_field=None,
186188
bigger_is_better=bigger_is_better_int,
189+
ignore_same_hash=ignore_same_hash_int,
187190
display_name=display_name, unit=unit,
188191
unit_abbrev=unit_abbrev)
189192
sample_fields.append(field)
@@ -196,6 +199,7 @@ def __json__(self):
196199
for sample_field in self.sample_fields:
197200
metric = {
198201
'bigger_is_better': (sample_field.bigger_is_better != 0),
202+
'ignore_same_hash': (sample_field.ignore_same_hash != 0),
199203
'display_name': sample_field.display_name,
200204
'name': sample_field.name,
201205
'type': sample_field.type.name,
@@ -340,12 +344,18 @@ class SampleField(FieldMixin, Base):
340344
# This assumption can be inverted by setting this column to nonzero.
341345
bigger_is_better = Column("bigger_is_better", Integer)
342346

347+
# Some fields like execution_time should ignore changes if the binary hash
348+
# is the same.
349+
ignore_same_hash = Column("ignore_same_hash", Integer, default=0)
350+
343351
def __init__(self, name, type, schema_index, status_field=None, bigger_is_better=0,
352+
ignore_same_hash=0,
344353
display_name=None, unit=None, unit_abbrev=None):
345354
self.name = name
346355
self.type = type
347356
self.status_field = status_field
348357
self.bigger_is_better = bigger_is_better
358+
self.ignore_same_hash = ignore_same_hash
349359
self.display_name = name if display_name is None else display_name
350360
self.unit = unit
351361
self.unit_abbrev = unit_abbrev
@@ -367,7 +377,7 @@ def __repr__(self):
367377

368378
def __copy__(self):
369379
return SampleField(self.name, self.type, self.schema_index, self.status_field,
370-
self.bigger_is_better, self.display_name, self.unit,
380+
self.bigger_is_better, self.ignore_same_hash, self.display_name, self.unit,
371381
self.unit_abbrev)
372382

373383
def copy_info(self, other):

lnt/server/reporting/analysis.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ class ComparisonResult:
5454
def __init__(self, aggregation_fn,
5555
cur_failed, prev_failed, samples, prev_samples,
5656
cur_hash, prev_hash, cur_profile=None, prev_profile=None,
57-
confidence_lv=0.05, bigger_is_better=False):
57+
confidence_lv=0.05, bigger_is_better=False,
58+
ignore_same_hash=False):
5859
self.aggregation_fn = aggregation_fn
5960

6061
# Special case: if we're using the minimum to aggregate, swap it for
@@ -103,6 +104,7 @@ def __init__(self, aggregation_fn,
103104

104105
self.confidence_lv = confidence_lv
105106
self.bigger_is_better = bigger_is_better
107+
self.ignore_same_hash = ignore_same_hash
106108

107109
def __repr__(self):
108110
"""Print this ComparisonResult's constructor.
@@ -118,7 +120,8 @@ def __repr__(self):
118120
self.samples,
119121
self.prev_samples,
120122
self.confidence_lv,
121-
bool(self.bigger_is_better))
123+
bool(self.bigger_is_better),
124+
bool(self.ignore_same_hash))
122125

123126
def __json__(self):
124127
simple_dict = self.__dict__
@@ -155,9 +158,6 @@ def get_test_status(self):
155158
else:
156159
return UNCHANGED_PASS
157160

158-
# FIXME: take into account hash of binary - if available. If the hash is
159-
# the same, the binary is the same and therefore the difference cannot be
160-
# significant - for execution time. It can be significant for compile time.
161161
def get_value_status(self, confidence_interval=2.576,
162162
value_precision=MIN_VALUE_PRECISION,
163163
ignore_small=True):
@@ -176,6 +176,12 @@ def get_value_status(self, confidence_interval=2.576,
176176
elif self.prev_failed:
177177
return UNCHANGED_PASS
178178

179+
# Ignore changes if the hash of the binary is the same and the field is
180+
# sensitive to the hash, e.g. execution time.
181+
if self.ignore_same_hash:
182+
if self.cur_hash and self.prev_hash and self.cur_hash == self.prev_hash:
183+
return UNCHANGED_PASS
184+
179185
# Always ignore percentage changes below MIN_PERCENTAGE_CHANGE %, for now, we just don't
180186
# have enough time to investigate that level of stuff.
181187
if ignore_small and abs(self.pct_delta) < MIN_PERCENTAGE_CHANGE:
@@ -355,7 +361,8 @@ def get_comparison_result(self, runs, compare_runs, test_id, field,
355361
prev_values, cur_hash, prev_hash,
356362
cur_profile, prev_profile,
357363
self.confidence_lv,
358-
bigger_is_better=field.bigger_is_better)
364+
bigger_is_better=field.bigger_is_better,
365+
ignore_same_hash=field.ignore_same_hash)
359366
return r
360367

361368
def get_geomean_comparison_result(self, run, compare_to, field, tests):
@@ -385,7 +392,8 @@ def get_geomean_comparison_result(self, run, compare_to, field, tests):
385392
cur_hash=cur_hash,
386393
prev_hash=prev_hash,
387394
confidence_lv=0,
388-
bigger_is_better=field.bigger_is_better)
395+
bigger_is_better=field.bigger_is_better,
396+
ignore_same_hash=field.ignore_same_hash)
389397

390398
def _load_samples_for_runs(self, session, run_ids, only_tests):
391399
# Find the set of new runs to load.

schemas/nts.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ metrics:
1616
display_name: Execution Time
1717
unit: seconds
1818
unit_abbrev: s
19+
ignore_same_hash: true
1920
- name: execution_status
2021
type: Status
2122
- name: score
2223
type: Real
2324
bigger_is_better: true
2425
display_name: Score
26+
ignore_same_hash: true
2527
- name: mem_bytes
2628
type: Real
2729
display_name: Memory Usage

tests/server/reporting/analysis.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,28 @@ def test_handle_zero_sample(self):
319319
None, None)
320320
self.assertEqual(zeroSample.get_value_status(), UNCHANGED_PASS)
321321

322+
def test_ignore_same_hash(self):
323+
"""Test ignore_same_hash ignores regressions with the same hash."""
324+
same_hash = ComparisonResult(min, False, False, [10.], [5.],
325+
'abc', 'abc', ignore_same_hash=True)
326+
self.assertEqual(same_hash.get_value_status(), UNCHANGED_PASS)
327+
self.assertFalse(same_hash.is_result_interesting())
328+
329+
diff_hash = ComparisonResult(min, False, False, [10.], [5.],
330+
'abc', '123', ignore_same_hash=True)
331+
self.assertEqual(diff_hash.get_value_status(), REGRESSED)
332+
self.assertTrue(diff_hash.is_result_interesting())
333+
334+
no_hash = ComparisonResult(min, False, False, [10.], [5.], None,
335+
'123', ignore_same_hash=True)
336+
self.assertEqual(no_hash.get_value_status(), REGRESSED)
337+
self.assertTrue(no_hash.is_result_interesting())
338+
339+
disabled = ComparisonResult(min, False, False, [10.], [5.],
340+
'abc', 'abc', ignore_same_hash=False)
341+
self.assertEqual(disabled.get_value_status(), REGRESSED)
342+
self.assertTrue(disabled.is_result_interesting())
343+
322344

323345
class AbsMinTester(unittest.TestCase):
324346

tests/server/ui/test_api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,8 @@ def test_schema(self):
290290
m['display_name'] = m['name']
291291
if 'bigger_is_better' not in m:
292292
m['bigger_is_better'] = False
293+
if 'ignore_same_hash' not in m:
294+
m['ignore_same_hash'] = False
293295
yaml_schema['metrics'].sort(key=lambda x: x['name'])
294296
yaml_schema['run_fields'].sort(key=lambda x: x['name'])
295297
yaml_schema['machine_fields'].sort(key=lambda x: x['name'])

0 commit comments

Comments
 (0)