Skip to content

Commit 8cb6c3c

Browse files
authored
Shared memory table scan (phase 2) (#1403)
* Insert new locators into type index * Remove obsolete locators from type index * Clean up comments * cleanup * rename method * remove allocate_type() * Address review feedback
1 parent b7424f3 commit 8cb6c3c

File tree

8 files changed

+138
-77
lines changed

8 files changed

+138
-77
lines changed

production/db/core/inc/db_helpers.hpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,13 @@ inline common::gaia_id_t allocate_id()
3232
return ++(counters->last_id);
3333
}
3434

35-
inline common::gaia_type_t allocate_type()
36-
{
37-
counters_t* counters = gaia::db::get_counters();
38-
return ++(counters->last_type_id);
39-
}
40-
4135
inline gaia_txn_id_t allocate_txn_id()
4236
{
4337
counters_t* counters = gaia::db::get_counters();
4438
return ++(counters->last_txn_id);
4539
}
4640

47-
inline gaia_locator_t allocate_locator()
41+
inline gaia_locator_t allocate_locator(common::gaia_type_t type)
4842
{
4943
counters_t* counters = gaia::db::get_counters();
5044

@@ -53,7 +47,14 @@ inline gaia_locator_t allocate_locator()
5347
throw system_object_limit_exceeded_internal();
5448
}
5549

56-
return ++(counters->last_locator);
50+
gaia_locator_t locator = ++(counters->last_locator);
51+
52+
type_index_t* type_index = get_type_index();
53+
// Ignore failure if type is already registered.
54+
type_index->register_type(type);
55+
type_index->add_locator(type, locator);
56+
57+
return locator;
5758
}
5859

5960
inline void update_locator(gaia_locator_t locator, gaia_offset_t offset)

production/db/core/inc/db_object_helpers.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ inline db_object_t* create_object(
3232
size_t ref_len = refs_count * sizeof(*refs);
3333
size_t total_len = obj_data_size + ref_len;
3434
gaia::db::hash_node_t* hash_node = db_hash_map::insert(id);
35-
hash_node->locator = allocate_locator();
35+
hash_node->locator = allocate_locator(type);
3636
gaia::db::allocate_object(hash_node->locator.load(), total_len);
3737
db_object_t* obj_ptr = locator_to_ptr(hash_node->locator.load());
3838
obj_ptr->id = id;
@@ -56,7 +56,7 @@ inline db_object_t* create_object(
5656
const void* obj_data)
5757
{
5858
gaia::db::hash_node_t* hash_node = db_hash_map::insert(id);
59-
hash_node->locator = allocate_locator();
59+
hash_node->locator = allocate_locator(type);
6060
gaia::db::allocate_object(hash_node->locator.load(), obj_data_size);
6161
db_object_t* obj_ptr = locator_to_ptr(hash_node->locator.load());
6262
obj_ptr->id = id;

production/db/core/inc/type_index.hpp

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -68,61 +68,64 @@ static_assert(
6868
static_assert(std::atomic<locator_list_node_t>::is_always_lock_free);
6969

7070
// Holds a type ID and the head of its linked list of locators.
71+
// This structure is always updated atomically via CAS.
7172
struct type_index_entry_t
7273
{
7374
// The type ID of this locator list.
74-
std::atomic<common::gaia_type_t::value_type> type;
75+
common::gaia_type_t::value_type type;
7576

7677
// The first locator in the list.
77-
std::atomic<gaia_locator_t::value_type> first_locator;
78+
gaia_locator_t::value_type first_locator;
7879
};
7980

80-
static_assert(sizeof(type_index_entry_t) == 8, "Expected sizeof(type_index_entry_t) to be 8!");
81+
static_assert(
82+
sizeof(type_index_entry_t) == sizeof(uint64_t),
83+
"Expected type_index_entry_t to occupy 8 bytes!");
84+
static_assert(std::atomic<type_index_entry_t>::is_always_lock_free);
8185

82-
// An index enabling efficient retrieval of all locators belonging to a
83-
// registered type. Each type ID maps to a linked list containing all locators
84-
// of that type.
86+
// An index enabling efficient retrieval of all locators of a registered type.
87+
// Each type ID maps to a linked list containing all locators of that type.
8588
struct type_index_t
8689
{
8790
// Mapping of registered type IDs to the heads of their locator lists.
88-
type_index_entry_t type_index_entries[c_max_types];
89-
90-
// Atomic counter incremented during type registration to determine the
91-
// index of the registered type in the `type_index_entries` array.
92-
std::atomic<size_t> type_index_entries_count;
91+
std::atomic<type_index_entry_t> type_index_entries[c_max_types];
9392

9493
// Pool of nodes used for the linked lists representing the sets of locators
9594
// belonging to each registered type. The array index of each node
9695
// corresponds to the locator it represents.
9796
locator_list_node_t locator_lists_array[c_max_locators + 1];
9897

98+
// Returns a reference to the type index entry containing `type`.
99+
// PRECONDITION: `type` has already been registered.
100+
inline std::atomic<type_index_entry_t>& get_type_index_entry(common::gaia_type_t type);
101+
99102
// Claims a slot in `type_index_entries` by atomically incrementing
100103
// `type_index_entries_count` (slots are not reused).
101-
inline void register_type(common::gaia_type_t type);
104+
// Returns true if `type` was not already registered, false otherwise.
105+
inline bool register_type(common::gaia_type_t type);
102106

103-
// Returns the head of the locator list for the given type.
107+
// Returns the head of the locator list for `type`.
104108
inline gaia_locator_t get_first_locator(common::gaia_type_t type);
105109

106-
// Changes the head of the locator list for the given type to
107-
// `desired_locator`, if the head is still `expected_locator`.
108-
// Returns true if the head is still `expected_locator`, false otherwise.
109-
// This has CAS semantics because we need to retry if the head of the list
110-
// changes during the operation.
111-
inline bool set_first_locator(
110+
// Changes the head of the locator list for `type` to `desired_locator`, if
111+
// the head is still `expected_locator`. Returns true if the head is still
112+
// `expected_locator`, false otherwise. (This has CAS semantics because we
113+
// need to retry if the head of the list changes during the operation.)
114+
inline bool try_set_first_locator(
112115
common::gaia_type_t type, gaia_locator_t expected_locator, gaia_locator_t desired_locator);
113116

114-
// Gets the list node corresponding to the given locator.
117+
// Gets the list node corresponding to `locator`.
115118
inline locator_list_node_t* get_list_node(gaia_locator_t locator);
116119

117-
// Inserts the node for a locator at the head of the list for its type.
120+
// Inserts the node for `locator` at the head of the list for `type`.
118121
// PRECONDITION: `type` is already registered in `type_index_entries`.
119122
// PRECONDITION: The list node for `locator` has not been previously used.
120123
// POSTCONDITION: `type_index_cursor_t(type).current_locator()` returns
121124
// `locator` (in the absence of concurrent invocations).
122125
inline void add_locator(common::gaia_type_t type, gaia_locator_t locator);
123126

124-
// Logically deletes the given locator from the list for its type.
125-
// Returns false if the given locator was already logically deleted, true otherwise.
127+
// Logically deletes `locator` from the list for its type.
128+
// Returns false if `locator` was already logically deleted, true otherwise.
126129
// PRECONDITION: `locator` was previously allocated.
127130
// POSTCONDITION: returns false if list node for `locator` was already marked for deletion.
128131
inline bool delete_locator(gaia_locator_t locator);

production/db/core/inc/type_index.inc

Lines changed: 73 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ bool locator_list_node_t::mark_for_deletion()
3636
{
3737
return false;
3838
}
39+
3940
uint64_t desired_word{data_word | c_deleted_flag_mask};
4041
if (data_word.compare_exchange_strong(expected_word, desired_word))
4142
{
@@ -46,45 +47,93 @@ bool locator_list_node_t::mark_for_deletion()
4647
return true;
4748
}
4849

49-
void type_index_t::register_type(common::gaia_type_t type)
50+
std::atomic<type_index_entry_t>& type_index_t::get_type_index_entry(common::gaia_type_t type)
5051
{
51-
if (type_index_entries_count >= c_max_types)
52-
{
53-
throw type_limit_exceeded_internal();
54-
}
55-
type_index_entries[type_index_entries_count++].type = type;
56-
}
52+
ASSERT_PRECONDITION(type.is_valid(), "Cannot call get_type_index_entry() with an invalid type!");
5753

58-
gaia_locator_t type_index_t::get_first_locator(common::gaia_type_t type)
59-
{
6054
// REVIEW: With our current limit of 64 types, linear search should be
6155
// fine (the whole array is at most 8 cache lines, so should almost
6256
// always be in L1 cache), but with more types we'll eventually need
63-
// sublinear search complexity.
64-
for (size_t i = 0; i < type_index_entries_count; ++i)
57+
// sublinear search complexity (e.g., a hash table).
58+
59+
// Scan until the end of the array. (We could stop at the first
60+
// uninitialized entry, but the branch is likely not worth it for such a
61+
// small array; see e.g.
62+
// https://dirtyhandscoding.wordpress.com/2017/08/25/performance-comparison-linear-search-vs-binary-search/.)
63+
for (size_t i = 0; i < std::size(type_index_entries); ++i)
6564
{
66-
if (type_index_entries[i].type == type)
65+
auto& entry_ref = type_index_entries[i];
66+
auto entry_val = entry_ref.load();
67+
if (entry_val.type == type)
6768
{
68-
return gaia_locator_t(type_index_entries[i].first_locator);
69+
return entry_ref;
6970
}
7071
}
71-
ASSERT_UNREACHABLE("Type must be registered before accessing its locator list!");
72+
// If we reach the end of the array without finding the entry for this type,
73+
// the precondition has been violated.
74+
ASSERT_UNREACHABLE("Type must be registered before calling get_type_index_entry()!");
7275
}
7376

74-
bool type_index_t::set_first_locator(
75-
common::gaia_type_t type, gaia_locator_t expected_locator, gaia_locator_t desired_locator)
77+
bool type_index_t::register_type(common::gaia_type_t type)
7678
{
77-
gaia_locator_t::value_type expected_value = expected_locator.value();
78-
gaia_locator_t::value_type desired_value = desired_locator.value();
79-
80-
for (size_t i = 0; i < type_index_entries_count; ++i)
79+
ASSERT_PRECONDITION(type.is_valid(), "Cannot call register_type() with an invalid type!");
80+
81+
// This implements the insert operation on a lock-free set. Inserting a
82+
// duplicate element is prevented by CAS semantics: each concurrent insert
83+
// uses the next uninitialized array entry (there can be no "holes" in the
84+
// array because entries only go from zero to nonzero, and we never scan
85+
// past an entry initially read as zero until a CAS shows it is nonzero), so
86+
// for any two concurrent inserts, one of them (the one that initializes the
87+
// higher-indexed entry) must see the other's insert, and abort if has the
88+
// same value.
89+
//
90+
// Scan until the first uninitialized entry or the end of the array,
91+
// whichever comes first.
92+
for (size_t i = 0; i < std::size(type_index_entries); ++i)
8193
{
82-
if (type_index_entries[i].type == type)
94+
auto& entry_ref = type_index_entries[i];
95+
auto entry_val = entry_ref.load();
96+
// The type was already registered.
97+
if (entry_val.type == type)
8398
{
84-
return type_index_entries[i].first_locator.compare_exchange_strong(expected_value, desired_value);
99+
return false;
100+
}
101+
102+
// Try to initialize the first uninitialized entry.
103+
//
104+
// REVIEW: This could technically be a relaxed load, because the
105+
// subsequent CAS will detect a stale read. However, we don't currently
106+
// specify non-default memory orderings anywhere, and I think we should
107+
// only change this policy on the basis of profiling data.
108+
if (entry_val.type == common::c_invalid_gaia_type)
109+
{
110+
type_index_entry_t expected_entry{common::c_invalid_gaia_type, c_invalid_gaia_locator};
111+
type_index_entry_t desired_entry{type, c_invalid_gaia_locator};
112+
113+
// If the CAS succeeds, we are done, otherwise try the next entry.
114+
if (entry_ref.compare_exchange_strong(expected_entry, desired_entry))
115+
{
116+
return true;
117+
}
85118
}
86119
}
87-
ASSERT_UNREACHABLE("Type must be registered before accessing its locator list!");
120+
121+
// We reached the end of the array without finding an uninitialized entry.
122+
throw type_limit_exceeded_internal();
123+
}
124+
125+
gaia_locator_t type_index_t::get_first_locator(common::gaia_type_t type)
126+
{
127+
return get_type_index_entry(type).load().first_locator;
128+
}
129+
130+
bool type_index_t::try_set_first_locator(
131+
common::gaia_type_t type, gaia_locator_t expected_locator, gaia_locator_t desired_locator)
132+
{
133+
type_index_entry_t expected_entry{type, expected_locator};
134+
type_index_entry_t desired_entry{type, desired_locator};
135+
136+
return get_type_index_entry(type).compare_exchange_strong(expected_entry, desired_entry);
88137
}
89138

90139
locator_list_node_t* type_index_t::get_list_node(gaia_locator_t locator)
@@ -127,7 +176,7 @@ void type_index_t::add_locator(common::gaia_type_t type, gaia_locator_t locator)
127176

128177
// Now try to point the list head to the new node, retrying if it
129178
// was concurrently pointed to another node.
130-
if (set_first_locator(type, first_locator, locator))
179+
if (try_set_first_locator(type, first_locator, locator))
131180
{
132181
break;
133182
}

production/db/core/inc/type_index_cursor.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ bool type_index_cursor_t::unlink_for_deletion()
6868
// Handle the special case where the first marked node is at the head of the list.
6969
if (!prev_node)
7070
{
71-
return m_type_index->set_first_locator(m_type, unlinked_locator, current_locator());
71+
return m_type_index->try_set_first_locator(m_type, unlinked_locator, current_locator());
7272
}
7373

7474
// Otherwise, set the previous node to point to the new current locator

production/db/core/src/db_server.cpp

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2006,22 +2006,22 @@ void server_t::gc_txn_log_from_offset(log_offset_t log_offset, bool is_committed
20062006
{
20072007
txn_log_t* txn_log = get_txn_log_from_offset(log_offset);
20082008

2009+
// Remove index entries that might be referencing obsolete versions before
2010+
// actually deallocating them.
2011+
bool deallocate_new_offsets = !is_committed;
2012+
index::index_builder_t::gc_indexes_from_txn_log(txn_log, deallocate_new_offsets);
2013+
20092014
// If the txn committed, we deallocate only undo versions, because the
20102015
// redo versions may still be visible after the txn has fallen
20112016
// behind the watermark. If the txn aborted, then we deallocate only
20122017
// redo versions, because the undo versions may still be visible. Note
20132018
// that we could deallocate intermediate versions (i.e., those
20142019
// superseded within the same txn) immediately, but we do it here
20152020
// for simplicity.
2016-
bool deallocate_new_offsets = !is_committed;
2017-
2018-
// Remove index entries that might be referencing obsolete versions before
2019-
// actually deallocating them.
2020-
index::index_builder_t::gc_indexes_from_txn_log(txn_log, deallocate_new_offsets);
2021-
deallocate_txn_log(txn_log, deallocate_new_offsets);
2021+
deallocate_txn_log(txn_log, is_committed);
20222022
}
20232023

2024-
void server_t::deallocate_txn_log(txn_log_t* txn_log, bool deallocate_new_offsets)
2024+
void server_t::deallocate_txn_log(txn_log_t* txn_log, bool is_committed)
20252025
{
20262026
ASSERT_PRECONDITION(txn_log, "txn_log must be a valid address!");
20272027
ASSERT_PRECONDITION(
@@ -2043,17 +2043,16 @@ void server_t::deallocate_txn_log(txn_log_t* txn_log, bool deallocate_new_offset
20432043
// txn log of an aborted txn is after it falls behind the watermark,
20442044
// because at that point it cannot be in the conflict window of any
20452045
// committing txn.
2046-
gaia_offset_t offset_to_free = deallocate_new_offsets
2047-
? log_record->new_offset
2048-
: log_record->old_offset;
2046+
gaia_offset_t offset_to_free = is_committed
2047+
? log_record->old_offset
2048+
: log_record->new_offset;
20492049

20502050
// If we're gc-ing the old version of an object that is being deleted,
20512051
// then request the deletion of its locator from the corresponding record list.
2052-
if (!deallocate_new_offsets && !log_record->new_offset.is_valid())
2052+
if (is_committed && log_record->operation() == gaia_operation_t::remove)
20532053
{
20542054
// Get the old object data to extract its type.
20552055
db_object_t* db_object = offset_to_ptr(log_record->old_offset);
2056-
20572056
// Retrieve the record_list_t instance corresponding to the type.
20582057
std::shared_ptr<record_list_t> record_list = record_list_manager_t::get()->get_record_list(db_object->type);
20592058

@@ -2065,6 +2064,18 @@ void server_t::deallocate_txn_log(txn_log_t* txn_log, bool deallocate_new_offset
20652064
{
20662065
deallocate_object(offset_to_free);
20672066
}
2067+
2068+
// For committed txns, we need to remove any deleted locators from the
2069+
// type index. For aborted or rolled-back txns, we need to remove any
2070+
// allocated locators from the type index.
2071+
bool is_locator_removal_committed = is_committed && log_record->operation() == gaia_operation_t::remove;
2072+
bool is_locator_creation_aborted = !is_committed && log_record->operation() == gaia_operation_t::create;
2073+
if (is_locator_removal_committed || is_locator_creation_aborted)
2074+
{
2075+
type_index_t* type_index = get_type_index();
2076+
bool has_succeeded = type_index->delete_locator(log_record->locator);
2077+
ASSERT_INVARIANT(has_succeeded, "A locator cannot be deleted twice!");
2078+
}
20682079
}
20692080

20702081
// We've deallocated all garbage versions, and we have no shared references,

production/db/core/src/gaia_ptr_client.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ gaia_ptr_t gaia_ptr_t::create_no_txn(gaia_id_t id, gaia_type_t type, reference_o
121121
// TODO: this constructor allows creating a gaia_ptr_t in an invalid state;
122122
// the db_object_t should either be initialized before and passed in
123123
// or it should be initialized inside the constructor.
124-
gaia_locator_t locator = allocate_locator();
124+
gaia_locator_t locator = allocate_locator(type);
125125
hash_node_t* hash_node = db_hash_map::insert(id);
126126
hash_node->locator = locator;
127127
allocate_object(locator, total_payload_size);

0 commit comments

Comments
 (0)