-
Notifications
You must be signed in to change notification settings - Fork 6
Custom recovery/checkpointing impl #1232
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: logwriter_thread
Are you sure you want to change the base?
Changes from all commits
6fd85c2
59b5586
aee42d2
98615c6
6a002d6
a0712f4
9913c26
3cb4d95
35fe56f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| #include <cstddef> | ||
|
|
||
| #include <atomic> | ||
| #include <limits> | ||
| #include <ostream> | ||
|
|
||
| #include "gaia/common.hpp" | ||
|
|
@@ -23,11 +24,30 @@ namespace db | |
| namespace persistence | ||
| { | ||
|
|
||
| enum class log_reader_mode_t : uint8_t | ||
| { | ||
| not_set = 0x0, | ||
|
|
||
| // Checkpoint mode. | ||
| // Does not tolerate any IO failure when reading a log file; any | ||
| // IO error is treated as unrecoverable. | ||
| checkpoint_fail_on_first_error = 0x1, | ||
|
|
||
| // Recovery mode. | ||
| // Stop recovery on first IO error. Database will always start and will try to recover as much | ||
| // committed data from the log as possible. | ||
| // Updates are logged one batch as a time; Persistent batch IO is validated | ||
| // first before marking any txn in the batch as durable (and returning a commit decision to the user); | ||
| // Thus ignore any txn after the last seen decision timestamp before encountering IO error. | ||
| recovery_stop_on_first_error = 0x2, | ||
| }; | ||
|
|
||
| enum class record_type_t : uint8_t | ||
| { | ||
| not_set = 0x0, | ||
| txn = 0x1, | ||
| decision = 0x2, | ||
| end_of_file = 0x3, | ||
| }; | ||
|
|
||
| enum class decision_type_t : uint8_t | ||
|
|
@@ -46,26 +66,27 @@ struct decision_entry_t | |
| // Pair of log file sequence number and file fd. | ||
| typedef std::vector<decision_entry_t> decision_list_t; | ||
|
|
||
| class file_sequence_t : public common::int_type_t<size_t, 0> | ||
| // Persistent log file sequence number. | ||
| class log_sequence_t : public common::int_type_t<size_t, 0> | ||
| { | ||
| public: | ||
| // By default, we should initialize to an invalid value. | ||
| constexpr file_sequence_t() | ||
| constexpr log_sequence_t() | ||
| : common::int_type_t<size_t, 0>() | ||
| { | ||
| } | ||
|
|
||
| constexpr file_sequence_t(size_t value) | ||
| constexpr log_sequence_t(size_t value) | ||
| : common::int_type_t<size_t, 0>(value) | ||
| { | ||
| } | ||
| }; | ||
|
|
||
| static_assert( | ||
| sizeof(file_sequence_t) == sizeof(file_sequence_t::value_type), | ||
| "file_sequence_t has a different size than its underlying integer type!"); | ||
| sizeof(log_sequence_t) == sizeof(log_sequence_t::value_type), | ||
| "log_sequence_t has a different size than its underlying integer type!"); | ||
|
|
||
| constexpr file_sequence_t c_invalid_file_sequence_number; | ||
| constexpr log_sequence_t c_invalid_log_sequence_number; | ||
|
|
||
| typedef size_t file_offset_t; | ||
|
|
||
|
|
@@ -81,19 +102,25 @@ typedef uint32_t crc32_t; | |
| // This assertion ensures that the default type initialization | ||
| // matches the value of the invalid constant. | ||
| static_assert( | ||
| c_invalid_file_sequence_number.value() == file_sequence_t::c_default_invalid_value, | ||
| "Invalid c_invalid_file_sequence_number initialization!"); | ||
| c_invalid_log_sequence_number.value() == log_sequence_t::c_default_invalid_value, | ||
| "Invalid c_invalid_log_sequence_number initialization!"); | ||
|
|
||
| struct log_file_info_t | ||
| { | ||
| file_sequence_t sequence; | ||
| log_sequence_t sequence; | ||
| int file_fd; | ||
| }; | ||
|
|
||
| struct log_file_pointer_t | ||
| { | ||
| void* begin; | ||
| size_t size; | ||
| }; | ||
|
|
||
| struct record_header_t | ||
| { | ||
| record_size_t record_size; | ||
| crc32_t crc; | ||
| record_size_t payload_size; | ||
| record_type_t record_type; | ||
| gaia_txn_id_t txn_commit_ts; | ||
|
|
||
|
|
@@ -109,8 +136,86 @@ struct record_header_t | |
| char padding[3]; | ||
| }; | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Re: the |
||
| // The primary motivation of this buffer is to keep a hold of any additional information we want to write to the log | ||
| // apart from the shared memory objects. | ||
| constexpr size_t c_invalid_read_record_size = 0; | ||
|
|
||
| struct read_record_t | ||
| { | ||
| struct record_header_t header; | ||
| unsigned char payload[]; | ||
|
|
||
| // Record size includes the header size and the payload size. | ||
| size_t get_record_size() | ||
| { | ||
| return header.record_size; | ||
| } | ||
|
|
||
| unsigned char* get_record() | ||
| { | ||
| return reinterpret_cast<unsigned char*>(this); | ||
| } | ||
|
|
||
| unsigned char* get_deleted_ids() | ||
| { | ||
| ASSERT_PRECONDITION(header.record_type == record_header_t::record_type::txn, "Incorrect record type!"); | ||
| return reinterpret_cast<unsigned char*>(payload); | ||
| } | ||
|
|
||
| unsigned char* get_objects() | ||
| { | ||
| ASSERT_PRECONDITION(header.record_type == record_header_t::record_type::txn, "Incorrect record type!"); | ||
| return get_deleted_ids() + header.deleted_object_count * sizeof(gaia_id_t); | ||
| } | ||
|
|
||
| decision_entry_t* get_decisions() | ||
| { | ||
| ASSERT_PRECONDITION(header.record_type == record_header_t::record_type::decision, "Incorrect record type!"); | ||
| return reinterpret_cast<decision_entry_t*>(payload); | ||
| } | ||
|
|
||
| unsigned char* get_payload_end() | ||
| { | ||
| return get_record() + get_record_size(); | ||
| } | ||
|
|
||
| static read_record_t* get_record(void* ptr) | ||
| { | ||
| ASSERT_PRECONDITION(ptr, "Invalid address!"); | ||
| return reinterpret_cast<read_record_t*>(ptr); | ||
| } | ||
|
|
||
| bool is_valid() | ||
| { | ||
| return header.record_size != c_invalid_read_record_size && (header.record_type == record_type_t::txn || header.record_type == record_type_t::decision || header.record_type == record_type_t::end_of_file); | ||
| } | ||
| }; | ||
|
|
||
| struct record_iterator_t | ||
| { | ||
| // Pointer to the current record in a log file. | ||
| unsigned char* iterator; | ||
|
|
||
| // Beginning of the log file. | ||
| unsigned char* begin; | ||
|
|
||
| // End of log file. | ||
| unsigned char* end; | ||
|
|
||
| // End of log file. May not be the same as end. | ||
| unsigned char* stop_at; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does this mean? |
||
|
|
||
| // Value returned from the mmap() call on a persistent log file. | ||
| void* mapped_data; | ||
| size_t map_size; | ||
| int file_fd; | ||
|
|
||
| // Recovery mode. | ||
| log_reader_mode_t recovery_mode; | ||
|
|
||
| //This flag is set when halt recovery is halted. | ||
| bool halt_recovery; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Halt recovery on what condition? Or is this just a flag we set when we want to halt recovery? |
||
| }; | ||
|
|
||
| // This buffer is used to stage non-object data to be written to the log. | ||
| // Custom information includes | ||
| // 1) deleted object IDs in a txn. | ||
| // 2) custom txn headers | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess we should move these comments to the header file, but you can wait until we move the others as well.