Skip to content

Commit 73cbd98

Browse files
authored
Convert Location to a namedtuple, and associated cleanup (#205)
This change makes the `Location` dataclass, which does not change frequently, into a new `SourceLocation` namedtuple, and changes the `SourceLocation` serialization. As a result, with this change: * `embossc` runs about 25% faster on a large (7kLOC) input; `python3 -OO emboss` runs about 19% faster on the same input. * Serialized IR is about 45% smaller. Details: * Replace the `ir_data.Location` dataclass with a new `parser_types.SourceLocation` namedtuple. The rename helps clarify the difference between a location within source code (`SourceLocation`) and a location within a structure (`FieldLocation`). * Similarly, replace `ir_data.Position` with `parser_types.SourcePosition`. * Update any place that edits a `SourceLocation` with an appropriate assignment; e.g., `x.source_location.end = y` becomes `x.source_location = x.source_location._replace(end=y)`. In most cases, several fields were updated consecutively; those updates are been merged. * Update the JSON serialization to use the compact format. * Replace `format_location()` and `format_position()` with `__str__()` methods on `SourceLocation` and `SourcePosition`, respectively. * Replace `parse_location()` and `parse_position()` with `from_str()` class methods on `SourceLocation` and `SourcePosition`, respectively. * Move the `make_location()` functionality into `SourceLocation.__new__()`. * Update `_to_dict` and `_from_dict` in `IrDataSerializer` to stringify and destringify `SourceLocation`. It is tempting to try to do this during the JSON serialization step (with a `default=` parameter to `json.dumps` and an `object_hook=` parameter to `json.loads`), but it is tricky to get the `object_hook` to know when to convert.
1 parent 46423da commit 73cbd98

29 files changed

+780
-1523
lines changed

compiler/back_end/cpp/header_generator.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,9 +1747,14 @@ def _offset_source_location_column(source_location, offset):
17471747
original start column.
17481748
"""
17491749

1750-
new_location = ir_data_utils.copy(source_location)
1751-
new_location.start.column = source_location.start.column + offset[0]
1752-
new_location.end.column = source_location.start.column + offset[1]
1750+
new_location = source_location._replace(
1751+
start=source_location.start._replace(
1752+
column=source_location.start.column + offset[0]
1753+
),
1754+
end=source_location.start._replace(
1755+
column=source_location.start.column + offset[1]
1756+
),
1757+
)
17531758

17541759
return new_location
17551760

compiler/back_end/cpp/header_generator_test.py

Lines changed: 49 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,11 @@ def test_rejects_bad_enum_case_at_start(self):
126126
)
127127
attr = ir.module[0].type[0].attribute[0]
128128

129-
bad_case_source_location = ir_data.Location()
130-
bad_case_source_location = ir_data_utils.builder(bad_case_source_location)
131-
bad_case_source_location.CopyFrom(attr.value.source_location)
132-
# Location of SHORTY_CASE in the attribute line.
133-
bad_case_source_location.start.column = 30
134-
bad_case_source_location.end.column = 41
129+
# SourceLocation of SHORTY_CASE in the attribute line.
130+
bad_case_source_location = attr.value.source_location._replace(
131+
start=attr.value.source_location.start._replace(column=30),
132+
end=attr.value.source_location.end._replace(column=41),
133+
)
135134

136135
self.assertEqual(
137136
[
@@ -156,12 +155,11 @@ def test_rejects_bad_enum_case_in_middle(self):
156155
)
157156
attr = ir.module[0].type[0].attribute[0]
158157

159-
bad_case_source_location = ir_data.Location()
160-
bad_case_source_location = ir_data_utils.builder(bad_case_source_location)
161-
bad_case_source_location.CopyFrom(attr.value.source_location)
162-
# Location of bad_CASE in the attribute line.
163-
bad_case_source_location.start.column = 43
164-
bad_case_source_location.end.column = 51
158+
# SourceLocation of bad_CASE in the attribute line.
159+
bad_case_source_location = attr.value.source_location._replace(
160+
start=attr.value.source_location.start._replace(column=43),
161+
end=attr.value.source_location.end._replace(column=51),
162+
)
165163

166164
self.assertEqual(
167165
[
@@ -186,12 +184,11 @@ def test_rejects_bad_enum_case_at_end(self):
186184
)
187185
attr = ir.module[0].type[0].attribute[0]
188186

189-
bad_case_source_location = ir_data.Location()
190-
bad_case_source_location = ir_data_utils.builder(bad_case_source_location)
191-
bad_case_source_location.CopyFrom(attr.value.source_location)
192-
# Location of BAD_case in the attribute line.
193-
bad_case_source_location.start.column = 55
194-
bad_case_source_location.end.column = 63
187+
# SourceLocation of BAD_case in the attribute line.
188+
bad_case_source_location = attr.value.source_location._replace(
189+
start=attr.value.source_location.start._replace(column=55),
190+
end=attr.value.source_location.end._replace(column=63),
191+
)
195192

196193
self.assertEqual(
197194
[
@@ -216,12 +213,11 @@ def test_rejects_duplicate_enum_case(self):
216213
)
217214
attr = ir.module[0].type[0].attribute[0]
218215

219-
bad_case_source_location = ir_data.Location()
220-
bad_case_source_location = ir_data_utils.builder(bad_case_source_location)
221-
bad_case_source_location.CopyFrom(attr.value.source_location)
222-
# Location of the second SHOUTY_CASE in the attribute line.
223-
bad_case_source_location.start.column = 43
224-
bad_case_source_location.end.column = 54
216+
# SourceLocation of the second SHOUTY_CASE in the attribute line.
217+
bad_case_source_location = attr.value.source_location._replace(
218+
start=attr.value.source_location.start._replace(column=43),
219+
end=attr.value.source_location.end._replace(column=54),
220+
)
225221

226222
self.assertEqual(
227223
[
@@ -246,12 +242,11 @@ def test_rejects_empty_enum_case(self):
246242
)
247243
attr = ir.module[0].type[0].attribute[0]
248244

249-
bad_case_source_location = ir_data.Location()
250-
bad_case_source_location = ir_data_utils.builder(bad_case_source_location)
251-
bad_case_source_location.CopyFrom(attr.value.source_location)
252-
# Location of excess comma.
253-
bad_case_source_location.start.column = 42
254-
bad_case_source_location.end.column = 42
245+
# SourceLocation of excess comma.
246+
bad_case_source_location = attr.value.source_location._replace(
247+
start=attr.value.source_location.start._replace(column=42),
248+
end=attr.value.source_location.end._replace(column=42),
249+
)
255250

256251
self.assertEqual(
257252
[
@@ -274,8 +269,10 @@ def test_rejects_empty_enum_case(self):
274269
" BAZ = 2\n"
275270
)
276271

277-
bad_case_source_location.start.column = 30
278-
bad_case_source_location.end.column = 30
272+
bad_case_source_location = attr.value.source_location._replace(
273+
start=attr.value.source_location.start._replace(column=30),
274+
end=attr.value.source_location.end._replace(column=30),
275+
)
279276

280277
self.assertEqual(
281278
[
@@ -298,8 +295,10 @@ def test_rejects_empty_enum_case(self):
298295
" BAZ = 2\n"
299296
)
300297

301-
bad_case_source_location.start.column = 54
302-
bad_case_source_location.end.column = 54
298+
bad_case_source_location = attr.value.source_location._replace(
299+
start=attr.value.source_location.start._replace(column=54),
300+
end=attr.value.source_location.end._replace(column=54),
301+
)
303302

304303
self.assertEqual(
305304
[
@@ -322,8 +321,10 @@ def test_rejects_empty_enum_case(self):
322321
" BAZ = 2\n"
323322
)
324323

325-
bad_case_source_location.start.column = 45
326-
bad_case_source_location.end.column = 45
324+
bad_case_source_location = attr.value.source_location._replace(
325+
start=attr.value.source_location.start._replace(column=45),
326+
end=attr.value.source_location.end._replace(column=45),
327+
)
327328

328329
self.assertEqual(
329330
[
@@ -346,8 +347,10 @@ def test_rejects_empty_enum_case(self):
346347
" BAZ = 2\n"
347348
)
348349

349-
bad_case_source_location.start.column = 30
350-
bad_case_source_location.end.column = 30
350+
bad_case_source_location = attr.value.source_location._replace(
351+
start=attr.value.source_location.start._replace(column=30),
352+
end=attr.value.source_location.end._replace(column=30),
353+
)
351354

352355
self.assertEqual(
353356
[
@@ -370,8 +373,10 @@ def test_rejects_empty_enum_case(self):
370373
" BAZ = 2\n"
371374
)
372375

373-
bad_case_source_location.start.column = 35
374-
bad_case_source_location.end.column = 35
376+
bad_case_source_location = attr.value.source_location._replace(
377+
start=attr.value.source_location.start._replace(column=35),
378+
end=attr.value.source_location.end._replace(column=35),
379+
)
375380

376381
self.assertEqual(
377382
[
@@ -394,8 +399,10 @@ def test_rejects_empty_enum_case(self):
394399
" BAZ = 2\n"
395400
)
396401

397-
bad_case_source_location.start.column = 31
398-
bad_case_source_location.end.column = 31
402+
bad_case_source_location = attr.value.source_location._replace(
403+
start=attr.value.source_location.start._replace(column=31),
404+
end=attr.value.source_location.end._replace(column=31),
405+
)
399406

400407
self.assertEqual(
401408
[

compiler/front_end/glue.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def parse_module(file_name, file_reader):
203203
"""
204204
source_code, errors = file_reader(file_name)
205205
if errors:
206-
location = parser_types.make_location((1, 1), (1, 1))
206+
location = parser_types.SourceLocation((1, 1), (1, 1))
207207
return (
208208
None,
209209
None,

compiler/front_end/glue_test.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from compiler.util import parser_types
2525
from compiler.util import test_util
2626

27-
_location = parser_types.make_location
27+
_location = parser_types.SourceLocation
2828

2929
_ROOT_PACKAGE = "testdata.golden"
3030
_GOLDEN_PATH = ""
@@ -232,7 +232,9 @@ def test_synthetic_error(self):
232232
self.assertFalse(errors)
233233
# Artificially mark the first field as is_synthetic.
234234
first_field = ir.module[0].type[0].structure.field[0]
235-
first_field.source_location.is_synthetic = True
235+
first_field.source_location = first_field.source_location._replace(
236+
is_synthetic=True
237+
)
236238
ir, errors = glue.process_ir(ir, None)
237239
self.assertTrue(errors)
238240
self.assertEqual(
@@ -259,8 +261,12 @@ def test_suppressed_synthetic_error(self):
259261
self.assertFalse(errors)
260262
# Artificially mark the name of the second field as is_synthetic.
261263
second_field = ir.module[0].type[0].structure.field[1]
262-
second_field.name.source_location.is_synthetic = True
263-
second_field.name.name.source_location.is_synthetic = True
264+
second_field.name.source_location = second_field.name.source_location._replace(
265+
is_synthetic=True
266+
)
267+
second_field.name.name.source_location = (
268+
second_field.name.name.source_location._replace(is_synthetic=True)
269+
)
264270
ir, errors = glue.process_ir(ir, None)
265271
self.assertEqual(1, len(errors))
266272
self.assertEqual("Duplicate name 'field'", errors[0][0].message)

compiler/front_end/lr1.py

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,9 @@ def __str__(self):
140140

141141
# ANY_TOKEN is used by mark_error as a "wildcard" token that should be replaced
142142
# by every other token.
143-
ANY_TOKEN = parser_types.Token(object(), "*", parser_types.parse_location("0:0-0:0"))
143+
ANY_TOKEN = parser_types.Token(
144+
object(), "*", parser_types.SourceLocation.from_str("0:0-0:0")
145+
)
144146

145147

146148
class Reduction(
@@ -690,26 +692,7 @@ def state():
690692
# children, setting the source_location to None in that case.
691693
start_position = None
692694
end_position = None
693-
for child in children:
694-
if (
695-
hasattr(child, "source_location")
696-
and child.source_location is not None
697-
):
698-
start_position = child.source_location.start
699-
break
700-
for child in reversed(children):
701-
if (
702-
hasattr(child, "source_location")
703-
and child.source_location is not None
704-
):
705-
end_position = child.source_location.end
706-
break
707-
if start_position is None:
708-
source_location = None
709-
else:
710-
source_location = parser_types.make_location(
711-
start_position, end_position
712-
)
695+
source_location = parser_types.merge_source_locations(*children)
713696
reduction = Reduction(
714697
next_action.rule.lhs, children, next_action.rule, source_location
715698
)

compiler/front_end/lr1_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def _tokenize(text):
3434
result = []
3535
for i in range(len(text)):
3636
result.append(
37-
Token(text[i], parser_types.make_location((1, i + 1), (1, i + 2)))
37+
Token(text[i], parser_types.SourceLocation((1, i + 1), (1, i + 2)))
3838
)
3939
return result
4040

@@ -209,7 +209,7 @@ def test_goto_table(self):
209209

210210
def test_successful_parse(self):
211211
parser = _alsu_grammar.parser()
212-
loc = parser_types.parse_location
212+
loc = parser_types.SourceLocation.from_str
213213
s_to_c_c = parser_types.Production.parse("S -> C C")
214214
c_to_c_c = parser_types.Production.parse("C -> c C")
215215
c_to_d = parser_types.Production.parse("C -> d")

0 commit comments

Comments
 (0)