Skip to content

Commit ceeb679

Browse files
committed
Fixed initialization of signed_num in new views; fixed default ports
Another contributor to the race condition in handle_verify_request is the fact that the new-view initialization of the SST leaves all of the delivery-counter fields (like persisted_num) at their default values, assuming the first delivery predicate will update them, but handle_verify_request can read from a remote node's signed_num before that node has written to it. It's safer to initialize signed_num and the corresponding signature to its correct value from the last view. verified_num is written before it's read, like persisted_num, so it doesn't need to be initialized. Separately, the default values for the Derecho ports cause problems when trying to test locally on Linux machines because they overlap with the Linux ephemeral port range (32768 and higher). Tests keep failing because a process accidentally connects to itself, or to another node's system-assigned outgoing port for a different connection. It's better to use lower values for these ports so they won't overlap with the ephemeral range, and these values are still high enough to be unused by any other service.
1 parent b7e401f commit ceeb679

7 files changed

Lines changed: 37 additions & 28 deletions

File tree

include/derecho/conf/conf.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -83,16 +83,16 @@ class Conf {
8383
std::map<const std::string, std::string> config = {
8484
// [DERECHO]
8585
{DERECHO_CONTACT_IP, "127.0.0.1"},
86-
{DERECHO_CONTACT_PORT, "23580"},
86+
{DERECHO_CONTACT_PORT, "14480"},
8787
{DERECHO_RESTART_LEADERS, "127.0.0.1"},
88-
{DERECHO_RESTART_LEADER_PORTS, "23580"},
88+
{DERECHO_RESTART_LEADER_PORTS, "14480"},
8989
{DERECHO_LOCAL_ID, "0"},
9090
{DERECHO_LOCAL_IP, "127.0.0.1"},
91-
{DERECHO_GMS_PORT, "23580"},
92-
{DERECHO_STATE_TRANSFER_PORT, "28366"},
93-
{DERECHO_SST_PORT, "37683"},
94-
{DERECHO_RDMC_PORT, "31675"},
95-
{DERECHO_EXTERNAL_PORT, "32645"},
91+
{DERECHO_GMS_PORT, "14480"},
92+
{DERECHO_STATE_TRANSFER_PORT, "14560"},
93+
{DERECHO_SST_PORT, "14660"},
94+
{DERECHO_RDMC_PORT, "14720"},
95+
{DERECHO_EXTERNAL_PORT, "14880"},
9696
{SUBGROUP_DEFAULT_RDMC_SEND_ALGORITHM, "binomial_send"},
9797
{DERECHO_P2P_LOOP_BUSY_WAIT_BEFORE_SLEEP_MS, "250"},
9898
{DERECHO_SST_POLL_CQ_TIMEOUT_MS, "2000"},
@@ -126,7 +126,7 @@ class Conf {
126126
{LOGGER_DEFAULT_LOG_NAME, "derecho_debug"},
127127
{LOGGER_DEFAULT_LOG_LEVEL, "debug"},
128128
{LOGGER_LOG_TO_TERMINAL, "true"},
129-
{LOGGER_LOG_FILE_DEPTH, "3"}};
129+
{LOGGER_LOG_FILE_DEPTH, "10"}};
130130

131131
public:
132132
// the option for parsing command line with getopt(not GetPot!!!)

include/derecho/core/detail/derecho_sst.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ void set(volatile char* string_array, const std::string& value);
389389

390390
void increment(volatile int& member);
391391

392-
bool equals(const volatile char& string_array, const std::string& value);
392+
bool equals(const volatile char* string_array, const std::string& value);
393393

394394
} // namespace gmssst
395395

src/conf/derecho-sample.cfg

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,21 @@
22
# contact ip - the active leader's ip address
33
contact_ip = 127.0.0.1
44
# contact port - the active leader's gms port
5-
contact_port = 23580
5+
contact_port = 14480
66
# list of leaders to contact during a restart in priority order
77
restart_leaders = 127.0.0.1,127.0.0.1
88
# list of GMS ports of the restart leaders, in the same order
9-
restart_leader_ports = 23580,23581
9+
restart_leader_ports = 14480,14481
1010
# derecho gms port
11-
gms_port = 23580
11+
gms_port = 14480
1212
# derecho state-transfer port
13-
state_transfer_port = 28366
13+
state_transfer_port = 14560
1414
# sst tcp port
15-
sst_port = 37683
15+
sst_port = 14660
1616
# rdmc tcp port
17-
rdmc_port = 31675
17+
rdmc_port = 14720
1818
# externel tcp port listening to external clients
19-
external_port = 32645
19+
external_port = 14880
2020
# Maximum possible node ID value
2121
# Node IDs are 32-bit integers, but all Derecho systems will have
2222
# many fewer nodes than this. Derecho will pre-allocate space for a
@@ -145,8 +145,8 @@ persistence_log_level = info
145145
# Whether logs should be printed to the terminal as well as saved to files (default is true)
146146
log_to_terminal = true
147147
# The number of older log files to save. Log files are rotated automatically
148-
# when the current one reaches 1MB in size. Default is 3.
149-
log_file_depth = 3
148+
# when the current one reaches 1MB in size. Default is 10.
149+
log_file_depth = 10
150150

151151
# optional layout configurations
152152
[LAYOUT]

src/conf/derecho_node-sample.cfg

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@ local_ip = 127.0.0.1
66
# These ports are optional: nodes will use the values from the group derecho.cfg by default,
77
# but if the port options are specified here they will override the defaults.
88
# derecho gms port
9-
gms_port = 23580
9+
gms_port = 14481
1010
# derecho state-transfer port
11-
state_transfer_port = 28366
11+
state_transfer_port = 14561
1212
# sst tcp port
13-
sst_port = 37683
13+
sst_port = 14661
1414
# rdmc tcp port
15-
rdmc_port = 31675
15+
rdmc_port = 14721
1616
# externel tcp port listening to external clients
17-
external_port = 32645
17+
external_port = 14881
1818

1919

2020
# RDMA section contains configurations of the following

src/core/derecho_sst.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ void DerechoSST::init_local_row_from_previous(const DerechoSST& old_sst, const i
3333
memcpy(const_cast<uint16_t*>(joiner_external_ports[local_row]),
3434
const_cast<const uint16_t*>(old_sst.joiner_external_ports[row] + num_changes_installed),
3535
(old_sst.joiner_external_ports.size() - num_changes_installed) * sizeof(uint16_t));
36-
//TODO: Copy over the last committed signature here? Or will the new view start with no signatures?
36+
// Initialize these flags to false
3737
for(size_t i = 0; i < suspected.size(); ++i) {
3838
suspected[local_row][i] = false;
3939
}
@@ -43,11 +43,20 @@ void DerechoSST::init_local_row_from_previous(const DerechoSST& old_sst, const i
4343
for(size_t i = 0; i < global_min.size(); ++i) {
4444
global_min[local_row][i] = 0;
4545
}
46+
// Initialize these counters with their previous values, except num_installed gets incremented
4647
num_changes[local_row] = old_sst.num_changes[row];
4748
num_committed[local_row] = old_sst.num_committed[row];
4849
num_acked[local_row] = old_sst.num_acked[row];
4950
num_installed[local_row] = old_sst.num_installed[row] + num_changes_installed;
5051
wedged[local_row] = false;
52+
// Copy over the previous view's last known signature and signed_num array
53+
// Unlike seq_num and persisted_num, these may get read by other nodes before they are updated in the new view
54+
memcpy(const_cast<persistent::version_t*>(signed_num[local_row]),
55+
const_cast<const persistent::version_t*>(old_sst.signed_num[row]),
56+
old_sst.signed_num.size() * sizeof(persistent::version_t));
57+
memcpy(const_cast<uint8_t*>(signatures[local_row]),
58+
const_cast<const uint8_t*>(old_sst.signatures[row]),
59+
old_sst.signatures.size() * sizeof(uint8_t));
5160
}
5261

5362
void DerechoSST::init_local_change_proposals(const int other_row) {

src/core/git_version.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ namespace derecho {
1313
const int MAJOR_VERSION = 2;
1414
const int MINOR_VERSION = 4;
1515
const int PATCH_VERSION = 1;
16-
const int COMMITS_AHEAD_OF_VERSION = 10;
16+
const int COMMITS_AHEAD_OF_VERSION = 11;
1717
const char* VERSION_STRING = "2.4.1";
18-
const char* VERSION_STRING_PLUS_COMMITS = "2.4.1+10";
18+
const char* VERSION_STRING_PLUS_COMMITS = "2.4.1+11";
1919

2020
}

src/core/view_manager.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1484,7 +1484,7 @@ void ViewManager::deliver_ragged_trim(DerechoSST& gmsSST) {
14841484
gmsSST.persisted_num[member_row][subgroup_id])
14851485
.second
14861486
< last_delivered_seq_num) {
1487-
dbg_debug(vm_logger, "Waiting for node {} to finish persisting update {}", shard_member, last_delivered_seq_num);
1487+
dbg_trace(vm_logger, "Waiting for node {} to finish persisting update {}", shard_member, last_delivered_seq_num);
14881488
return false;
14891489
}
14901490
}
@@ -1639,7 +1639,6 @@ void ViewManager::finish_view_change(DerechoSST& gmsSST) {
16391639
// This will block until everyone responds to SST/RDMC initial handshakes
16401640
transition_multicast_group(next_subgroup_settings, new_num_received_size, new_slot_size, new_index_field_size);
16411641
dbg_debug(vm_logger, "Done setting up SST and MulticastGroup for view {}; about to do a sync_with_members()", next_view->vid);
1642-
dbg_trace(vm_logger, "My row in new SST initialized to: {}", next_view->gmsSST->to_string());
16431642

16441643
// New members can now proceed to view_manager.finish_setup(), which will call put() and sync()
16451644
next_view->gmsSST->push_row_except_slots();
@@ -1650,6 +1649,7 @@ void ViewManager::finish_view_change(DerechoSST& gmsSST) {
16501649
old_views_cv.notify_all();
16511650
}
16521651
curr_view = std::move(next_view);
1652+
dbg_trace(vm_logger, "New SST in view {}: {}", curr_view->vid, curr_view->gmsSST->to_string());
16531653

16541654
if(any_persistent_objects) {
16551655
// Write the new view to disk before using it

0 commit comments

Comments
 (0)