llvm-lnt/lnt/testing/__init__.py at c568c9c146e610aaacec55c2a907cd2d585fcd41 · llvm/llvm-lnt · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
"""
Utilities for working with the LNT test format.

Clients can easily generate LNT test format data by creating Report
objects for the runs they wish to submit, and using Report.render to
convert them to JSON data suitable for submitting to the server.
"""

import datetime
import json
import re
from lnt.util import logger

# We define the following constants for use as sample values by
# convention.
PASS = 0
FAIL = 1
XFAIL = 2


def normalize_time(t):
    if isinstance(t, float):
        t = datetime.datetime.utcfromtimestamp(t)
    elif not isinstance(t, datetime.datetime):
        t = datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S')
    return t.strftime('%Y-%m-%d %H:%M:%S')


class Report:
    """Information on a single testing run.

    In the LNT test model, every test run should define exactly one
    machine and run, and any number of test samples.
    """
    def __init__(self, machine, run, tests):
        """Construct a LNT report file."""
        self.machine = machine
        self.run = run
        self.tests = list(tests)
        self._check()

    def _check(self):
        """Check that object members are adequate to generate an LNT
        json report file.
        """
        assert isinstance(self.machine, Machine), "Unexpected type for machine."
        assert isinstance(self.run, Run), "Unexpected type for run."
        for t in self.tests:
            assert isinstance(t, Test), "Unexpected type for test"

    def update_report(self, new_tests_samples, end_time=None):
        """Add extra samples to this report, and update the end time of
        the run.
        """
        self._check()
        self.tests.extend(new_tests_samples)
        self.run.update_endtime(end_time)
        self._check()

    def render(self, indent=4):
        """Return a LNT json report file as a string, where each object is
        indented by spaces compared to its parent.
        """
        return json.dumps({
            'format_version': '2',
            'machine': self.machine.render(),
            'run': self.run.render(),
            'tests': [t.render() for t in self.tests]},
            sort_keys=True, indent=indent)


class Machine:
    """Information on the machine the test was run on.

    The info dictionary can be used to describe additional information
    about the machine, for example the hardware resources or the
    operating environment.

    Machines entries in the database are uniqued by their name and the
    entire contents of the info dictionary.
    """
    def __init__(self, name, info={}):
        self.name = str(name)
        self.info = dict((str(key), str(value))
                         for key, value in info.items())

    def render(self):
        """Return info from this instance in a dictionary that respects
        the LNT JSON report format.
        """
        d = dict(self.info)
        d['name'] = self.name
        return d


class Run:
    """Information on the particular test run.

    At least one parameter must be supplied and is used as ordering
    among several runs.

    As with Machine, the info dictionary can be used to describe
    additional information on the run. This dictionary should be used to
    describe information on the software-under-test that is constant
    across the test run, for example the revision number being tested.
    It can also be used to describe information about the current state
    which could be useful in analysis, for example the current machine
    load.
    """
    def __init__(self, start_time=None, end_time=None, info={}):
        self.start_time = normalize_time(start_time) if start_time is not None else None
        self.end_time = normalize_time(end_time) if end_time is not None else None
        self.info = dict()
        # Convert keys/values that are not json encodable to strings.
        for key, value in info.items():
            key = str(key)
            value = str(value)
            self.info[key] = value

        if 'llvm_project_revision' not in self.info:
            raise ValueError("Missing 'llvm_project_revision' entry in 'info' dictionary")

    def update_endtime(self, end_time=None):
        """Update the end time of this run."""
        self.end_time = normalize_time(end_time) if end_time else None

    def render(self):
        """Return info from this instance in a dictionary that respects
        the LNT JSON report format.
        """
        d = dict(self.info)
        if self.start_time is not None:
            d['start_time'] = self.start_time
        if self.end_time is not None:
            d['end_time'] = self.end_time
        return d


class Test:
    """Information on a particular test in the run and its associated
    samples.

    The server automatically creates test database objects whenever a
    new test name is seen.

    Test names are intended to be a persistent, recognizable identifier
    for what is being executed. Currently, most formats use some form of
    dotted notation for the test name, and this may become enshrined in
    the format in the future. In general, the test names should be
    independent of the software-under-test and refer to some known
    quantity, for example the software under test. For example,
    'CINT2006.403_gcc' is a meaningful test name.

    The test info dictionary is intended to hold information on the
    particular permutation of the test that was run. This might include
    variables specific to the software-under-test . This could include,
    for example, the compile flags the test was built with, or the
    runtime parameters that were used. As a general rule, if two test
    samples are meaningfully and directly comparable, then they should
    have the same test name but different info parameters.
    """

    def __init__(self, name, samples, info={}):
        self.name = name
        self.samples = samples
        self.info = dict()
        # Convert keys/values that are not json encodable to strings.
        for key, value in info.items():
            key = str(key)
            value = str(value)
            self.info[key] = value
        self.check()

    def check(self):
        """Check object members are adequate to generate an LNT json
        report file.
        """
        for s in self.samples:
            assert isinstance(s, MetricSamples), "Unexpected type for metric sample."

    def render(self):
        """Return info from this instance in a dictionary that respects
        the LNT JSON report format.
        """
        d = dict(self.info)
        d.update([s.render().popitem() for s in self.samples])
        d['name'] = self.name
        return d


class TestSamples:
    """Information on a given test and its associated samples data.

    Samples data must all relate to the same metric. When several
    metrics are available for a given test, the convention is to have
    one TestSamples per metric and to encode the metric into the name,
    e.g. Benchmark1.exec. The server automatically creates test database
    objects whenever a new test name is seen. TestSamples should only be
    used to generate report in version 1 or earlier of LNT JSON report
    file format.

    Test names are intended to be a persistent, recognizable identifier
    for what is being executed. Currently, most formats use some form of
    dotted notation for the test name, and this may become enshrined in
    the format in the future. In general, the test names should be
    independent of the software-under-test and refer to some known
    quantity, for example the software under test. For example,
    'CINT2006.403_gcc' is a meaningful test name.

    The test info dictionary is intended to hold information on the
    particular permutation of the test that was run. This might include
    variables specific to the software-under-test . This could include,
    for example, the compile flags the test was built with, or the
    runtime parameters that were used. As a general rule, if two test
    samples are meaningfully and directly comparable, then they should
    have the same test name but different info parameters.

    The report may include an arbitrary number of samples for each test
    for situations where the same test is run multiple times to gather
    statistical data.
    """

    def __init__(self, name, data, info={}, conv_f=float):
        """Create an instance representing the samples converted into
        floating-point values using the conv_f function.
        """
        self.name = str(name)
        self.info = dict((str(key), str(value))
                         for key, value in info.items())
        self.data = list(map(conv_f, data))

    def render(self):
        """Return info from this instance in a dictionary that respects
        the LNT report format in the version specified at construction
        when printed as json.
        """
        return {'Name': self.name,
                'Info': self.info,
                'Data': self.data}

    def __repr__(self):
        # TODO remove this
        return "TestSample({}): {} - {}".format(self.name,
                                                self.data,
                                                self.info)


class MetricSamples:
    """Samples data for a given metric of a given test.

    An arbitrary number of samples for a given metric is allowed for
    situations where the same metric is obtained several time for a
    given test to gather statistical data.
    """

    def __init__(self, metric, data, conv_f=float):
        self.metric = str(metric)
        self.data = list(map(conv_f, data))

    def add_samples(self, new_samples, conv_f=float):
        """Add samples for this metric, converted to float by calling
        function conv_f.
        """
        self.data.extend(map(conv_f, new_samples))

    def render(self):
        """Return info from this instance in a dictionary that respects
        the LNT report format in the version specified at construction
        when printed as json.
        """
        return {self.metric: self.data if len(self.data) > 1 else self.data[0]}


###
# Format Versioning

# We record information on the report "version" to allow the server to support
# some level of auto-upgrading data from submissions of older reports.
#
# We record the report version as a reserved key in the run information. When
# importing data, we detect the version of the report using the version number
# and we normalize it to the latest format so that the rest of the code only
# has to deal with the latest version at all times.
#
# Version 0 --           : initial (and unversioned).
#
# Version 1 -- 2012-04-12: run_order was changed to not be padded, and allow
# non-integral values.
#
# Version 2 -- 2017-06:  Revamped json format
#    - Directly uses lnt names (no 'info_key' names anymore)
#    - Flatten Machine.Info and Run.Info into the Machine and Run records
#    - One record for each test (not one record for test+metric) with one entry
#      for each metric.
def _get_format_version(data):
    format_version = data.get('format_version')
    if format_version is not None:
        return int(format_version)

    # Older versions had a Run.Info.__report_version__ field
    run = data.get('Run')
    if run is not None:
        info = run.get('Info')
        if info is not None:
            report_version = info.get('__report_version__', '0')
            return int(report_version)

    return None


def upgrade_0_to_1(data):
    # We recompute the run_order here if it looks like this run_order was
    # derived (we presume from sniffing a compiler).
    run_info = data['Run']['Info']
    run_order = run_info.get('run_order')
    inferred_run_order = run_info.get('inferred_run_order')

    # If the run order is missing, or wasn't the inferred one, do nothing.
    if run_order is None or (run_order != inferred_run_order and
                             inferred_run_order is not None):
        return data

    # Otherwise, assume this run order was derived.

    # Trim whitespace.
    run_order = run_order.strip()
    run_info['run_order'] = run_info['inferred_run_order'] = run_order

    # If this was a production Clang build, try to recompute the src tag.
    if 'clang' in run_info.get('cc_name', '') and \
            run_info.get('cc_build') == 'PROD' and \
            run_info.get('cc_src_tag') and \
            run_order == run_info['cc_src_tag'].strip():
        # Extract the version line.
        version_ln = None
        for ln in run_info.get('cc_version', '').split('\n'):
            if ' version ' in ln:
                version_ln = ln
                break
        else:
            # We are done if we didn't find one.
            return data

        # Extract the build string.
        m = re.match(r'(.*) version ([^ ]*) (\([^(]*\))(.*)',
                     version_ln)
        if not m:
            return data

        cc_name, cc_version_num, cc_build_string, cc_extra = m.groups()
        m = re.search('clang-([0-9.]*)', cc_build_string)
        if m:
            run_info['run_order'] = run_info['inferred_run_order'] = \
                run_info['cc_src_tag'] = m.group(1)
    data['Run']['Info']['__report_version__'] = "1"
    return data


# Upgrading from version 1 to version 2 needs some schema in place
class _UpgradeSchema(object):
    def __init__(self, metric_rename, machine_param_rename, run_param_rename):
        self.metric_rename = metric_rename
        self.machine_param_rename = machine_param_rename
        self.run_param_rename = run_param_rename


_nts_upgrade = _UpgradeSchema(
    metric_rename={
        '.code_size': 'code_size',
        '.compile': 'compile_time',
        '.compile.status': 'compile_status',
        '.exec': 'execution_time',
        '.exec.status': 'execution_status',
        '.hash': 'hash',
        '.hash.status': 'hash_status',
        '.mem': 'mem_bytes',
        '.score': 'score',
        '.profile': 'profile',
    }, machine_param_rename={
        'name': 'hostname',  # Avoid name clash with actual machine name.
    }, run_param_rename={
        'run_order': 'llvm_project_revision',
    }
)
_compile_upgrade = _UpgradeSchema(
    metric_rename={
        '.mem': 'mem_bytes',
        '.mem.status': 'mem_status',
        '.size': 'size_bytes',
        '.size.status': 'size_status',
        '.sys': 'sys_time',
        '.sys.status': 'sys_status',
        '.user': 'user_time',
        '.user.status': 'user_status',
        '.wall': 'wall_time',
        '.wall.status': 'wall_status',
    }, machine_param_rename={
        'hw.model': 'hardware',
        'kern.version': 'os_version',
        'name': 'hostname',
    }, run_param_rename={
        'run_order': 'llvm_project_revision',
    }
)
_default_upgrade = _UpgradeSchema(
    metric_rename={},
    machine_param_rename={},
    run_param_rename={
        'run_order': 'llvm_project_revision',
    }
)
_upgrades = {
    'nts': _nts_upgrade,
    'compile': _compile_upgrade
}


def upgrade_1_to_2(data, ts_name):
    result = dict()

    # Pull version and database schema to toplevel
    result['format_version'] = '2'
    report_version = data['Run']['Info'].pop('__report_version__', '1')
    # We should not be in upgrade_1_to_2 for other versions
    assert report_version == '1'
    tag = data['Run']['Info'].pop('tag', None)
    if tag is not None and tag != ts_name:
        raise ValueError("Importing '%s' data into '%s' testsuite" %
                         (tag, ts_name))

    upgrade = _upgrades.get(tag)
    if upgrade is None:
        logger.warning("No upgrade schema known for '%s'\n" % tag)
        upgrade = _default_upgrade

    # Flatten Machine.Info into machine
    Machine = data['Machine']
    result_machine = {'name': Machine['Name']}
    for key, value in Machine['Info'].items():
        newname = upgrade.machine_param_rename.get(key, key)
        if newname in result_machine:
            raise ValueError("Name clash for machine info '%s'" % newname)
        result_machine[newname] = value
    result['machine'] = result_machine

    # Flatten Result.Info into result
    Run = data['Run']
    result_run = {}
    start_time = Run.get('Start Time')
    if start_time is not None:
        result_run['start_time'] = start_time
    end_time = Run.get('End Time')
    if end_time is not None:
        result_run['end_time'] = end_time
    for key, value in Run['Info'].items():
        newname = upgrade.run_param_rename.get(key, key)
        if newname in result_run:
            raise ValueError("Name clash for run info '%s'" % newname)
        result_run[newname] = value
    result['run'] = result_run

    # Merge tests
    result_tests = list()
    result_tests_dict = dict()
    Tests = data['Tests']
    for test in Tests:
        test_Name = test['Name']

        # Old testnames always started with 'tag.', split that part.
        if len(test['Info']) != 0:
            # The Info record didn't work with the v4 database anyway...
            raise ValueError("Tests/%s: cannot convert non-empty Info record" %
                             test_Name)
        tag_dot = '%s.' % ts_name
        if not test_Name.startswith(tag_dot):
            raise ValueError("Tests/%s: test name does not start with '%s'" %
                             (test_Name, tag_dot))
        name_metric = test_Name[len(tag_dot):]

        found_metric = False
        for oldname, newname in upgrade.metric_rename.items():
            assert oldname.startswith('.')
            if name_metric.endswith(oldname):
                name = name_metric[:-len(oldname)]
                metric = newname
                found_metric = True
                break
        if not found_metric:
            # Fallback logic for unknown metrics: Assume they are '.xxxx'
            name, dot, metric = name_metric.rpartition('.')
            if dot != '.':
                raise ValueError("Tests/%s: name does not end in .metric" %
                                 test_Name)
            logger.warning("Found unknown metric '%s'" % metric)
            upgrade.metric_rename['.'+metric] = metric

        result_test = result_tests_dict.get(name)
        if result_test is None:
            result_test = {'name': name}
            result_tests_dict[name] = result_test
            result_tests.append(result_test)

        data = test['Data']
        if metric not in result_test:
            # Do not construct a list for the very common case of just a
            # single datum.
            if len(data) == 1:
                data = data[0]
            result_test[metric] = data
        elif len(data) > 0:
            # Transform the test data into a list
            if not isinstance(result_test[metric], list):
                result_test[metric] = [result_test[metric]]
            result_test[metric] += data

    result['tests'] = result_tests
    return result


def upgrade_and_normalize_report(data, ts_name):
    # Get the report version. V2 has it at the top level, older version
    # in Run.Info.
    format_version = _get_format_version(data)
    if format_version is None:
        data['format_version'] = '2'
        format_version = 2

    if format_version == 0:
        data = upgrade_0_to_1(data)
        format_version = 1
    if format_version == 1:
        data = upgrade_1_to_2(data, ts_name)
        format_version = 2

    if format_version != 2 or data['format_version'] != '2':
        raise ValueError("Unknown format version")
    if 'run' not in data:
        import pprint
        logger.info(pprint.pformat(data))
        raise ValueError("No 'run' section in submission")
    if 'machine' not in data:
        raise ValueError("No 'machine' section in submission")
    if 'tests' not in data:
        raise ValueError("No 'tests' section in submission")

    run = data['run']
    if 'start_time' not in run:
        time = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
        run['start_time'] = time
        run['end_time'] = time
    elif 'end_time' not in run:
        run['end_time'] = run['start_time']

    return data


__all__ = ['Report', 'Machine', 'Run', 'TestSamples']