Skip to content

Commit 6a2a809

Browse files
committed
Initial optimization of get_declaration_of
(1) Cache values in the initial lookup, which at least prevents duplicate traversals. I think we can do even better here by not doing a linear search at all, but this improvement is dramatic enough in `pure2-last-use.cpp2` test case that I'm going to check it in as at least an interim optimization. (2) Add `stable_vector`. Since `get_declaration_of` can be called before the `vector` symbol table is completely constructed, later pushes into the symbol table could invalidate previously cached iterators (so just doing (1) above by itself would crash). To avoid that problem, I've also added a new stable (and simplified) `stable_vector` that is intended for cppfront's grow-at-the-end uses. Its first use is in the symbol table, where as an unintended side effect it also seems to be faster than the `vector` for just the symbol table's container operations. I wanted to build something like the `stable_vector` type anyway because I plan to also use it (or something similar) as part of getting rid of all the `deque` objects. `deque` is a useful type conceptually, but sadly I didn't realize how terrible `deque` implementations currently often are so I need to rip these out.
1 parent d09b052 commit 6a2a809

File tree

4 files changed

+212
-27
lines changed

4 files changed

+212
-27
lines changed

Diff for: source/common.h

+167-4
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include <compare>
3838
#include <cstdint>
3939
#include <iomanip>
40+
#include <iterator>
4041
#include <map>
4142
#include <string>
4243
#include <string_view>
@@ -966,6 +967,168 @@ struct error_entry
966967
};
967968

968969

970+
//-----------------------------------------------------------------------
971+
//
972+
// stable_vector: a simple segmented vector with limited interface
973+
// that doesn't invalidate by moving memory
974+
//
975+
//-----------------------------------------------------------------------
976+
//
977+
template <typename T>
978+
class stable_vector
979+
{
980+
static constexpr size_t PageSize = 3; // 1'000;
981+
982+
std::vector< std::vector<T> > data;
983+
984+
auto add_segment() -> void {
985+
data.push_back( {} );
986+
data.back().reserve(PageSize);
987+
}
988+
989+
public:
990+
stable_vector() {
991+
add_segment();
992+
}
993+
994+
auto empty() const -> bool {
995+
return data.size() == 1 && data.back().empty();
996+
}
997+
998+
auto size() const -> size_t {
999+
return (data.size() - 1) * PageSize + data.back().size();
1000+
}
1001+
1002+
auto ssize() const -> ptrdiff_t {
1003+
return unsafe_narrow<ptrdiff_t>(size());
1004+
}
1005+
1006+
auto operator[](size_t idx) -> T& {
1007+
testing.enforce(idx < size());
1008+
return data[idx / PageSize][idx % PageSize];
1009+
}
1010+
1011+
auto operator[](size_t idx) const -> T const& {
1012+
testing.enforce(idx < size());
1013+
return data[idx / PageSize][idx % PageSize];
1014+
}
1015+
1016+
auto back() -> T& {
1017+
return data.back().back();
1018+
}
1019+
1020+
auto push_back(T const& t) -> void {
1021+
if (data.back().size() == data.back().capacity()) {
1022+
add_segment();
1023+
}
1024+
data.back().push_back(t);
1025+
}
1026+
1027+
template< class... Args >
1028+
auto emplace_back( Args&&... args ) -> T& {
1029+
if (data.back().size() == data.back().capacity()) {
1030+
add_segment();
1031+
}
1032+
return data.back().emplace_back(CPP2_FORWARD(args)...);
1033+
}
1034+
1035+
auto pop_back() -> void {
1036+
data.back().pop_back();
1037+
if (data.back().size() == 0) {
1038+
data.pop_back();
1039+
}
1040+
}
1041+
1042+
//-------------------------------------------------------------------
1043+
// Debug interface
1044+
//
1045+
auto debug_print() -> void {
1046+
std::cout << "stable_vector:\n";
1047+
for (auto i = 0; auto& chunk : data) {
1048+
std::cout << " -- page " << i++ << " --\n ";
1049+
for (auto e : chunk) {
1050+
std::cout << e << ' ';
1051+
}
1052+
std::cout << "\n";
1053+
}
1054+
}
1055+
1056+
//-------------------------------------------------------------------
1057+
// Iterator interface
1058+
//
1059+
class iterator {
1060+
stable_vector* v;
1061+
size_t pos = 0;
1062+
public:
1063+
using value_type = T;
1064+
using difference_type = std::ptrdiff_t;
1065+
using pointer = T*;
1066+
using reference = T&;
1067+
using iterator_category = std::random_access_iterator_tag;
1068+
1069+
iterator( stable_vector* v_ = nullptr, size_t pos_ = 0) : v{v_}, pos{pos_} { }
1070+
auto operator++ () -> void { if (pos < v->size()) { ++pos; } }
1071+
auto operator-- () -> void { if (pos > 0 ) { --pos; } }
1072+
auto operator+= (size_t off) -> void { if (pos + off < v->size()) { pos += off; } else { pos = v->size(); } }
1073+
auto operator-= (size_t off) -> void { if (pos - off > 0 ) { pos -= off; } else { pos = 0; } }
1074+
auto operator* () -> T& { return (*v)[pos ]; }
1075+
auto operator-> () -> T* { return &(*v)[pos ]; }
1076+
auto operator[] (size_t off) -> T& { return (*v)[pos + off]; }
1077+
auto operator+ (size_t off) -> iterator { auto i = *this; i += off; return i; }
1078+
auto operator- (size_t off) -> iterator { auto i = *this; i -= off; return i; }
1079+
auto operator- (iterator const& that) -> ptrdiff_t { return pos - that.pos; }
1080+
auto operator<=>(iterator const&) const -> std::strong_ordering = default;
1081+
};
1082+
1083+
class const_iterator {
1084+
stable_vector const* v;
1085+
size_t pos = 0;
1086+
public:
1087+
using value_type = const T;
1088+
using difference_type = std::ptrdiff_t;
1089+
using pointer = T const*;
1090+
using reference = T const&;
1091+
using iterator_category = std::random_access_iterator_tag;
1092+
1093+
const_iterator( stable_vector const* v_ = nullptr, size_t pos_ = 0) : v{v_}, pos{pos_} { }
1094+
auto operator++ () -> void { if (pos < v->size()) { ++pos; } }
1095+
auto operator-- () -> void { if (pos > 0 ) { --pos; } }
1096+
auto operator+= (size_t off) -> void { if (pos + off < v->size()) { pos += off; } else { pos = v->size(); } }
1097+
auto operator-= (size_t off) -> void { if (pos - off > 0 ) { pos -= off; } else { pos = 0; } }
1098+
auto operator* () -> T const& { return (*v)[pos ]; }
1099+
auto operator-> () -> T const* { return &(*v)[pos ]; }
1100+
auto operator[] (size_t off) -> T const& { return (*v)[pos + off]; }
1101+
auto operator+ (size_t off) -> const_iterator { auto i = *this; i += off; return i; }
1102+
auto operator- (size_t off) -> const_iterator { auto i = *this; i -= off; return i; }
1103+
auto operator- (const_iterator const& that) -> ptrdiff_t { return pos - that.pos; }
1104+
auto operator<=>(const_iterator const&) const -> std::strong_ordering = default;
1105+
};
1106+
1107+
auto begin() -> iterator { return {this, 0 }; }
1108+
auto end () -> iterator { return {this, size()}; }
1109+
auto begin() const -> const_iterator { return {this, 0 }; }
1110+
auto end () const -> const_iterator { return {this, size()}; }
1111+
auto cbegin() const -> const_iterator { return {this, 0 }; }
1112+
auto cend () const -> const_iterator { return {this, size()}; }
1113+
};
1114+
1115+
template <typename T>
1116+
auto operator+ (size_t off, typename stable_vector<T>::iterator const& it) -> typename stable_vector<T>::iterator { auto i = it; i += off; return i; }
1117+
1118+
template <typename T>
1119+
auto operator+ (size_t off, typename stable_vector<T>::const_iterator const& it) -> typename stable_vector<T>::const_iterator { auto i = it; i += off; return i; }
1120+
1121+
// And now jump over to std:: to drop in the size/ssize overloads
1122+
}
1123+
namespace std {
1124+
template <typename T>
1125+
auto size(cpp2::stable_vector<T> const& v) -> ptrdiff_t { return v. size(); }
1126+
template <typename T>
1127+
auto ssize(cpp2::stable_vector<T> const& v) -> ptrdiff_t { return v.ssize(); }
1128+
}
1129+
namespace cpp2 {
1130+
1131+
9691132
//-----------------------------------------------------------------------
9701133
//
9711134
// Internal instrumentation
@@ -1088,7 +1251,7 @@ std::vector<stackinstr::entry> stackinstr::largest;
10881251
// Example: Today I wanted to measure how long get_declaration_of is
10891252
// taking. So add this line at the start of that function's body:
10901253
//
1091-
// auto guard = scope_timer("get_declaration_of");
1254+
// auto timer = scope_timer("get_declaration_of");
10921255
//
10931256
// Recompile cppfront, then run with -verbose:
10941257
//
@@ -1115,14 +1278,14 @@ std::vector<stackinstr::entry> stackinstr::largest;
11151278
// enclose that in { } with a scope_timer:
11161279
//
11171280
// {
1118-
// auto guard = scope_timer("get_declaration_of step 1, initial find loop");
1281+
// auto timer1 = scope_timer("get_declaration_of step 1, initial find loop");
11191282
// /* the code I want to measure */
11201283
// }
11211284
//
11221285
// - Immediately after that, install a second timer to measure the
11231286
// second loop which covers the entire rest of the function body:
11241287
//
1125-
// auto guard2 = scope_timer("get_declaration_of step 2, rest of lookup");
1288+
// auto timer2 = scope_timer("get_declaration_of step 2, rest of lookup");
11261289
// /* followed by the rest of the function's body */
11271290
//
11281291
// - And, since it's easy, throw in a third timer to measure one
@@ -1132,7 +1295,7 @@ std::vector<stackinstr::entry> stackinstr::largest;
11321295
// timer at those points in each loop iteration to measure just
11331296
// the sum of all those loop fragments):
11341297
//
1135-
// auto guard = scope_timer("get_declaration_of step 2b, 'move this' branch");
1298+
// auto timer2b = scope_timer("get_declaration_of step 2b, 'move this' branch");
11361299
//
11371300
// Recompile cppfront and run again with -verbose... sample output:
11381301
//

Diff for: source/lex.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -342,11 +342,11 @@ auto labelized_position(token const* t)
342342
struct label {
343343
std::string text;
344344
label() {
345-
static auto ordinal = 0;
345+
static auto ordinal = 0; // TODO: static
346346
text = std::to_string(++ordinal);
347347
}
348348
};
349-
static auto labels = std::unordered_map<token const*, label const>{};
349+
static auto labels = std::unordered_map<token const*, label const>{}; // TODO: static
350350

351351
assert (t);
352352
return labels[t].text;
@@ -614,11 +614,11 @@ auto expand_raw_string_literal(
614614
// A stable place to store additional text for source tokens that are merged
615615
// into a whitespace-containing token (to merge the Cpp1 multi-token keywords)
616616
// -- this isn't about tokens generated later, that's tokens::generated_tokens
617-
static auto generated_text = std::deque<std::string>{};
618-
static auto generated_lines = std::deque<std::vector<source_line>>{};
617+
static auto generated_text = std::deque<std::string>{}; // TODO: static
618+
static auto generated_lines = std::deque<std::vector<source_line>>{}; // TODO: static
619619

620620

621-
static auto multiline_raw_strings = std::deque<multiline_raw_string>{};
621+
static auto multiline_raw_strings = std::deque<multiline_raw_string>{}; // TODO: static
622622

623623
auto lex_line(
624624
std::string& mutable_line,
@@ -2070,7 +2070,7 @@ class tokens
20702070

20712071
};
20722072

2073-
static auto generated_lexers = std::deque<tokens>{};
2073+
static auto generated_lexers = std::deque<tokens>{}; // TODO: static
20742074

20752075
}
20762076

Diff for: source/parse.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ struct expression_statement_node;
506506

507507
struct expression_node
508508
{
509-
static inline std::vector<expression_node*> current_expressions = {};
509+
static inline std::vector<expression_node*> current_expressions = {}; // TODO: static ?
510510

511511
std::unique_ptr<assignment_expression_node> expr;
512512
int num_subexpressions = 0;
@@ -759,7 +759,7 @@ auto primary_expression_node::get_literal() const
759759

760760
struct expression_statement_node
761761
{
762-
static inline std::vector<expression_statement_node*> current_expression_statements = {};
762+
static inline std::vector<expression_statement_node*> current_expression_statements = {}; // TODO: static ?
763763

764764
std::unique_ptr<expression_node> expr;
765765
bool has_semicolon = false;

Diff for: source/sema.h

+37-15
Original file line numberDiff line numberDiff line change
@@ -357,10 +357,12 @@ class sema
357357
{
358358
public:
359359
std::vector<error_entry>& errors;
360-
std::vector<symbol> symbols;
360+
stable_vector<symbol> symbols;
361361

362362
std::vector<selection_statement_node const*> active_selections;
363363

364+
mutable std::unordered_map< token const*, cpp2::stable_vector<symbol>::const_iterator > token_to_symbol_iterator;
365+
364366
public:
365367
//-----------------------------------------------------------------------
366368
// Constructor
@@ -397,34 +399,54 @@ class sema
397399
) const
398400
-> declaration_sym const*
399401
{
402+
auto timer = scope_timer("get_declaration_of");
403+
400404
// First find the position the query is coming from
401405
// and remember its depth
402406
auto i = symbols.cbegin();
403-
while (
404-
i != symbols.cend()
405-
&& i->get_global_token_order() < t.get_global_token_order()
407+
408+
//{
409+
//auto timer1 = scope_timer("get_declaration_of step 1, initial find loop");
410+
411+
// Check the cache first to avoid repeated computations
412+
if (auto it = token_to_symbol_iterator.find(&t);
413+
it != token_to_symbol_iterator.end()
406414
)
407415
{
408-
++i;
416+
i = it->second;
409417
}
410-
411-
while (
412-
i == symbols.cend()
413-
|| !i->start
414-
)
418+
else
415419
{
416-
if (i == symbols.cbegin()) {
417-
return nullptr;
420+
while (
421+
i != symbols.cend()
422+
&& i->get_global_token_order() < t.get_global_token_order()
423+
)
424+
{
425+
++i;
418426
}
419-
--i;
427+
428+
while (
429+
i == symbols.cend()
430+
|| !i->start
431+
)
432+
{
433+
if (i == symbols.cbegin()) {
434+
return nullptr;
435+
}
436+
--i;
437+
}
438+
token_to_symbol_iterator[&t] = i;
420439
}
440+
//}
421441

422442
auto depth = i->depth;
423443

444+
//auto timer2 = scope_timer("get_declaration_of step 2, rest of lookup");
445+
424446
// Then look backward to find the first declaration of
425447
// this name that is not deeper (in a nested scope)
426448
// and is in the same function
427-
using I = std::vector<symbol>::const_iterator;
449+
using I = cpp2::stable_vector<symbol>::const_iterator;
428450
auto advance = [](I& i, int n, I bound) { // TODO Use `std::ranges::advance`
429451
auto in = i;
430452
if (std::abs(n) >= std::abs(bound - i)) {
@@ -2424,7 +2446,7 @@ class sema
24242446

24252447
// By giving tokens an order during sema
24262448
// generated code can be equally checked
2427-
static index_t global_token_counter = 1;
2449+
static index_t global_token_counter = 1; // TODO static
24282450
t.set_global_token_order( global_token_counter++ );
24292451

24302452
auto started_member_access =

0 commit comments

Comments
 (0)