From ef11349e22758828084a110ee78851ba5198e9d4 Mon Sep 17 00:00:00 2001 From: ninsmiracle Date: Tue, 9 Jul 2024 17:31:41 +0800 Subject: [PATCH 1/6] fix: deal with plog concurrent problem --- .../duplication/load_from_private_log.cpp | 19 +++++++++++++++++-- src/replica/mutation_log.cpp | 13 +++++++++++++ src/replica/mutation_log.h | 5 +++++ src/replica/replica_chkpt.cpp | 1 + 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/replica/duplication/load_from_private_log.cpp b/src/replica/duplication/load_from_private_log.cpp index b0e501409f..73527b7ada 100644 --- a/src/replica/duplication/load_from_private_log.cpp +++ b/src/replica/duplication/load_from_private_log.cpp @@ -156,14 +156,29 @@ void load_from_private_log::find_log_file_to_start() // Reopen the files. Because the internal file handle of `file_map` // is cleared once WAL replay finished. They are unable to read. mutation_log::log_file_map_by_index new_file_map; - for (const auto &pr : file_map) { + decree cleanable_decree = _private_log->get_cleanable_decree(); + decree max_decree_gpid = _private_log->max_decree(get_gpid()); + if(max_decree_gpid <= cleanable_decree){ + LOG_ERROR_PREFIX("plog_file all error: max_decree_gpid {} , cleanable_decree {}", max_decree_gpid, cleanable_decree); + return; + } + + for (auto it = file_map.rbegin(); it != file_map.rend(); ++it) { log_file_ptr file; - error_s es = log_utils::open_read(pr.second->path(), file); + error_s es = log_utils::open_read(it->second->path(), file); if (!es.is_ok()) { LOG_ERROR_PREFIX("{}", es); return; } new_file_map.emplace(pr.first, file); + + // next file map may can not open + gpid pid = get_gpid(); + decree previous_log_max_decree = file->previous_log_max_decree(pid); + // these plog_file has possible be deleted do not open_read next plog_file , otherwise it may coredump + if(previous_log_max_decree <= cleanable_decree){ + break ; + } } find_log_file_to_start(std::move(new_file_map)); diff --git a/src/replica/mutation_log.cpp b/src/replica/mutation_log.cpp index df3ca2e6b2..23b78efe36 100644 --- a/src/replica/mutation_log.cpp +++ b/src/replica/mutation_log.cpp @@ -361,6 +361,7 @@ void mutation_log::init_states() _private_log_info = {0, 0}; _plog_max_decree_on_disk = 0; _plog_max_commit_on_disk = 0; + _cleanable_decree = 0; } error_code mutation_log::open(replay_callback read_callback, @@ -898,6 +899,18 @@ void mutation_log::update_max_commit_on_disk_no_lock(decree d) } } +decree mutation_log::get_cleanable_decree() const +{ + zauto_lock l(_lock); + return _cleanable_decree; +} + +void mutation_log::set_cleanable_decree(decree d) +{ + zauto_lock l(_lock); + _cleanable_decree = d; +} + bool mutation_log::get_learn_state(gpid gpid, decree start, /*out*/ learn_state &state) const { CHECK(_is_private, "this method is only valid for private logs"); diff --git a/src/replica/mutation_log.h b/src/replica/mutation_log.h index 7689004aae..3981b430dc 100644 --- a/src/replica/mutation_log.h +++ b/src/replica/mutation_log.h @@ -301,6 +301,9 @@ class mutation_log : public ref_counter task_tracker *tracker() { return &_tracker; } + decree get_cleanable_decree() const; + void set_cleanable_decree(decree target); + protected: // 'size' is data size to write; the '_global_end_offset' will be updated by 'size'. // can switch file only when create_new_log_if_needed = true; @@ -400,6 +403,8 @@ class mutation_log : public ref_counter // for plog. Since it is set with mutation.data.header.last_committed_decree, it must // be less than _plog_max_decree_on_disk. decree _plog_max_commit_on_disk; + + decree _cleanable_decree; // for deal with gc conflict }; typedef dsn::ref_ptr mutation_log_ptr; diff --git a/src/replica/replica_chkpt.cpp b/src/replica/replica_chkpt.cpp index b975626b61..ffeee67978 100644 --- a/src/replica/replica_chkpt.cpp +++ b/src/replica/replica_chkpt.cpp @@ -171,6 +171,7 @@ void replica::on_checkpoint_timer() } } + _private_log->set_cleanable_decree(cleanable_decree); tasking::enqueue(LPC_GARBAGE_COLLECT_LOGS_AND_REPLICAS, &_tracker, [this, plog, cleanable_decree, valid_start_offset] { From a440943f308951b2a914d5ce59030fae3cc56082 Mon Sep 17 00:00:00 2001 From: ninsmiracle Date: Tue, 9 Jul 2024 19:38:05 +0800 Subject: [PATCH 2/6] Pass IWYU --- src/replica/duplication/load_from_private_log.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/replica/duplication/load_from_private_log.cpp b/src/replica/duplication/load_from_private_log.cpp index 73527b7ada..11ba673794 100644 --- a/src/replica/duplication/load_from_private_log.cpp +++ b/src/replica/duplication/load_from_private_log.cpp @@ -22,6 +22,7 @@ #include #include "common/duplication_common.h" +#include "common/gpid.h" #include "duplication_types.h" #include "load_from_private_log.h" #include "replica/duplication/mutation_batch.h" @@ -170,7 +171,7 @@ void load_from_private_log::find_log_file_to_start() LOG_ERROR_PREFIX("{}", es); return; } - new_file_map.emplace(pr.first, file); + new_file_map.emplace(it->first, file); // next file map may can not open gpid pid = get_gpid(); From f26e448d4bad7e6507050c924db73fc0e9f32cf9 Mon Sep 17 00:00:00 2001 From: ninsmiracle Date: Tue, 9 Jul 2024 19:41:21 +0800 Subject: [PATCH 3/6] format code --- src/replica/duplication/load_from_private_log.cpp | 15 +++++++++------ src/replica/mutation_log.h | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/replica/duplication/load_from_private_log.cpp b/src/replica/duplication/load_from_private_log.cpp index 11ba673794..65b5aabcb1 100644 --- a/src/replica/duplication/load_from_private_log.cpp +++ b/src/replica/duplication/load_from_private_log.cpp @@ -158,9 +158,11 @@ void load_from_private_log::find_log_file_to_start() // is cleared once WAL replay finished. They are unable to read. mutation_log::log_file_map_by_index new_file_map; decree cleanable_decree = _private_log->get_cleanable_decree(); - decree max_decree_gpid = _private_log->max_decree(get_gpid()); - if(max_decree_gpid <= cleanable_decree){ - LOG_ERROR_PREFIX("plog_file all error: max_decree_gpid {} , cleanable_decree {}", max_decree_gpid, cleanable_decree); + decree max_decree_gpid = _private_log->max_decree(get_gpid()); + if (max_decree_gpid <= cleanable_decree) { + LOG_ERROR_PREFIX("plog_file all error: max_decree_gpid {} , cleanable_decree {}", + max_decree_gpid, + cleanable_decree); return; } @@ -176,9 +178,10 @@ void load_from_private_log::find_log_file_to_start() // next file map may can not open gpid pid = get_gpid(); decree previous_log_max_decree = file->previous_log_max_decree(pid); - // these plog_file has possible be deleted do not open_read next plog_file , otherwise it may coredump - if(previous_log_max_decree <= cleanable_decree){ - break ; + // these plog_file has possible be deleted do not open_read next plog_file , otherwise it + // may coredump + if (previous_log_max_decree <= cleanable_decree) { + break; } } diff --git a/src/replica/mutation_log.h b/src/replica/mutation_log.h index 3981b430dc..d213d06b03 100644 --- a/src/replica/mutation_log.h +++ b/src/replica/mutation_log.h @@ -404,7 +404,7 @@ class mutation_log : public ref_counter // be less than _plog_max_decree_on_disk. decree _plog_max_commit_on_disk; - decree _cleanable_decree; // for deal with gc conflict + decree _cleanable_decree; // for deal with gc conflict }; typedef dsn::ref_ptr mutation_log_ptr; From 49f092832f5ef50d6986ca23b33f59faf2f4d296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=AD=E5=AE=81=E6=B7=B1?= Date: Wed, 25 Sep 2024 15:51:26 +0800 Subject: [PATCH 4/6] update code with github comment --- src/replica/duplication/load_from_private_log.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/replica/duplication/load_from_private_log.cpp b/src/replica/duplication/load_from_private_log.cpp index 65b5aabcb1..f2d1c8b23b 100644 --- a/src/replica/duplication/load_from_private_log.cpp +++ b/src/replica/duplication/load_from_private_log.cpp @@ -159,14 +159,12 @@ void load_from_private_log::find_log_file_to_start() mutation_log::log_file_map_by_index new_file_map; decree cleanable_decree = _private_log->get_cleanable_decree(); decree max_decree_gpid = _private_log->max_decree(get_gpid()); - if (max_decree_gpid <= cleanable_decree) { - LOG_ERROR_PREFIX("plog_file all error: max_decree_gpid {} , cleanable_decree {}", - max_decree_gpid, - cleanable_decree); - return; - } + CHECK(max_decree_gpid > cleanable_decree, + "plog_file all error: max_decree_gpid {} , cleanable_decree {}", + max_decree_gpid, + cleanable_decree); - for (auto it = file_map.rbegin(); it != file_map.rend(); ++it) { + for (auto it = file_map.crbegin(); it != file_map.crend(); ++it) { log_file_ptr file; error_s es = log_utils::open_read(it->second->path(), file); if (!es.is_ok()) { @@ -175,10 +173,9 @@ void load_from_private_log::find_log_file_to_start() } new_file_map.emplace(it->first, file); - // next file map may can not open gpid pid = get_gpid(); decree previous_log_max_decree = file->previous_log_max_decree(pid); - // these plog_file has possible be deleted do not open_read next plog_file , otherwise it + // These plog file has possible be deleted do not open_read next plog file , otherwise it // may coredump if (previous_log_max_decree <= cleanable_decree) { break; From 7ae6d6b769dd6f9f5b880ca66f290422575cf00c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=AD=E5=AE=81=E6=B7=B1?= Date: Wed, 25 Sep 2024 16:01:15 +0800 Subject: [PATCH 5/6] small fix --- src/replica/duplication/load_from_private_log.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/replica/duplication/load_from_private_log.cpp b/src/replica/duplication/load_from_private_log.cpp index f2d1c8b23b..2a2d492580 100644 --- a/src/replica/duplication/load_from_private_log.cpp +++ b/src/replica/duplication/load_from_private_log.cpp @@ -160,7 +160,7 @@ void load_from_private_log::find_log_file_to_start() decree cleanable_decree = _private_log->get_cleanable_decree(); decree max_decree_gpid = _private_log->max_decree(get_gpid()); CHECK(max_decree_gpid > cleanable_decree, - "plog_file all error: max_decree_gpid {} , cleanable_decree {}", + "plog files all error: max_decree_gpid {} , cleanable_decree {}", max_decree_gpid, cleanable_decree); @@ -175,8 +175,8 @@ void load_from_private_log::find_log_file_to_start() gpid pid = get_gpid(); decree previous_log_max_decree = file->previous_log_max_decree(pid); - // These plog file has possible be deleted do not open_read next plog file , otherwise it - // may coredump + // These plog files has possible be deleted do not open_read() next plog file , otherwise it + // may coredump. if (previous_log_max_decree <= cleanable_decree) { break; } From f030a9ea9d030fb7c5fbe71d86082b342422d600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=AD=E5=AE=81=E6=B7=B1?= Date: Wed, 25 Sep 2024 16:03:29 +0800 Subject: [PATCH 6/6] make english comment better --- src/replica/mutation_log.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replica/mutation_log.h b/src/replica/mutation_log.h index d213d06b03..b1d3c8e6a0 100644 --- a/src/replica/mutation_log.h +++ b/src/replica/mutation_log.h @@ -404,7 +404,7 @@ class mutation_log : public ref_counter // be less than _plog_max_decree_on_disk. decree _plog_max_commit_on_disk; - decree _cleanable_decree; // for deal with gc conflict + decree _cleanable_decree; // To deal with gc conflict }; typedef dsn::ref_ptr mutation_log_ptr;