зеркало из https://github.com/microsoft/CCF.git
Retain all signatures during a soft rollback (#5749)
This commit is contained in:
Родитель
c95948a202
Коммит
27b44fac44
|
@ -157,7 +157,6 @@ namespace aft
|
|||
ccf::View current_view = 0;
|
||||
kv::Version last_idx = 0;
|
||||
kv::Version commit_idx = 0;
|
||||
kv::Version watermark_idx = 0;
|
||||
ViewHistory view_history;
|
||||
kv::Version new_view_idx = 0;
|
||||
|
||||
|
@ -183,7 +182,6 @@ namespace aft
|
|||
current_view,
|
||||
last_idx,
|
||||
commit_idx,
|
||||
watermark_idx,
|
||||
new_view_idx,
|
||||
leadership_state,
|
||||
membership_state);
|
||||
|
|
|
@ -115,13 +115,6 @@ namespace aft
|
|||
|
||||
std::optional<kv::RetirementPhase> retirement_phase = std::nullopt;
|
||||
std::chrono::milliseconds timeout_elapsed;
|
||||
// Last (committable) index preceding the node's election, this is
|
||||
// used to decide when to start issuing signatures. While commit_idx
|
||||
// hasn't caught up with election_index, a newly elected leader is
|
||||
// effectively finishing establishing commit over the previous term
|
||||
// or even previous terms, and can therefore not meaningfully sign
|
||||
// over the commit level.
|
||||
kv::Version election_index = 0;
|
||||
|
||||
// When this node receives append entries from a new primary, it may need to
|
||||
// roll back a committable but uncommitted suffix it holds. The
|
||||
|
@ -314,6 +307,33 @@ namespace aft
|
|||
committable_indices.back();
|
||||
}
|
||||
|
||||
// Returns the highest committable index which is not greater than the
|
||||
// given idx.
|
||||
std::optional<Index> find_highest_possible_committable_index(
|
||||
Index idx) const
|
||||
{
|
||||
const auto it = std::upper_bound(
|
||||
committable_indices.rbegin(),
|
||||
committable_indices.rend(),
|
||||
idx,
|
||||
[](const auto& l, const auto& r) { return l >= r; });
|
||||
if (it == committable_indices.rend())
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return *it;
|
||||
}
|
||||
|
||||
void compact_committable_indices(Index idx)
|
||||
{
|
||||
while (!committable_indices.empty() &&
|
||||
(committable_indices.front() <= idx))
|
||||
{
|
||||
committable_indices.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
void enable_all_domains() override
|
||||
{
|
||||
// When receiving append entries as a follower, all security domains will
|
||||
|
@ -405,12 +425,6 @@ namespace aft
|
|||
return {get_term_internal(commit_idx), commit_idx};
|
||||
}
|
||||
|
||||
ccf::SeqNo get_previous_committable_seqno() override
|
||||
{
|
||||
std::lock_guard<ccf::pal::Mutex> guard(state->lock);
|
||||
return last_committable_index();
|
||||
}
|
||||
|
||||
Term get_view(Index idx) override
|
||||
{
|
||||
std::lock_guard<ccf::pal::Mutex> guard(state->lock);
|
||||
|
@ -1815,17 +1829,7 @@ namespace aft
|
|||
return;
|
||||
}
|
||||
|
||||
// When we force to become the primary we are going around the
|
||||
// consensus protocol. This only happens when a node starts a new network
|
||||
// and has a genesis or recovery tx as the last transaction
|
||||
election_index = last_committable_index();
|
||||
|
||||
// A newly elected leader must not advance the commit index until a
|
||||
// transaction in the new term commits. We achieve this by clearing our
|
||||
// list committable indices - so nothing from a previous term is now
|
||||
// considered committable. Instead this new primary will shortly produce
|
||||
// their own signature, which _will_ be considered committable.
|
||||
committable_indices.clear();
|
||||
const auto election_index = last_committable_index();
|
||||
|
||||
RAFT_DEBUG_FMT(
|
||||
"Election index is {} in term {}", election_index, state->current_view);
|
||||
|
@ -1897,6 +1901,9 @@ namespace aft
|
|||
restart_election_timeout();
|
||||
reset_last_ack_timeouts();
|
||||
|
||||
// Drop anything unsigned here, but retain all signed entries. Only do a
|
||||
// more aggressive rollback, potentially including signatures, when
|
||||
// receiving a conflicting AppendEntries
|
||||
rollback(last_committable_index());
|
||||
|
||||
if (can_endorse_primary())
|
||||
|
@ -2074,12 +2081,18 @@ namespace aft
|
|||
}
|
||||
}
|
||||
|
||||
// If there exists some committable idx in the current term such that idx >
|
||||
// commit_idx and a majority of nodes have replicated it, commit to that
|
||||
// idx.
|
||||
void update_commit()
|
||||
{
|
||||
// If there exists some idx in the current term such that
|
||||
// idx > commit_idx and a majority of nodes have replicated it,
|
||||
// commit to that idx.
|
||||
auto new_commit_idx = std::numeric_limits<Index>::max();
|
||||
if (state->leadership_state != kv::LeadershipState::Leader)
|
||||
{
|
||||
throw std::logic_error(
|
||||
"update_commit() must only be called while this node is leader");
|
||||
}
|
||||
|
||||
std::optional<Index> new_agreement_index = std::nullopt;
|
||||
|
||||
// Obtain CFT watermarks
|
||||
for (auto const& c : configurations)
|
||||
|
@ -2104,31 +2117,55 @@ namespace aft
|
|||
sort(match.begin(), match.end());
|
||||
auto confirmed = match.at((match.size() - 1) / 2);
|
||||
|
||||
if (confirmed < new_commit_idx)
|
||||
if (
|
||||
!new_agreement_index.has_value() ||
|
||||
confirmed < new_agreement_index.value())
|
||||
{
|
||||
new_commit_idx = confirmed;
|
||||
new_agreement_index = confirmed;
|
||||
}
|
||||
}
|
||||
RAFT_DEBUG_FMT(
|
||||
"In update_commit, new_commit_idx: {}, "
|
||||
"last_idx: {}",
|
||||
new_commit_idx,
|
||||
state->last_idx);
|
||||
|
||||
if (new_commit_idx != std::numeric_limits<Index>::max())
|
||||
if (new_agreement_index.has_value())
|
||||
{
|
||||
state->watermark_idx = new_commit_idx;
|
||||
}
|
||||
if (new_agreement_index.value() > state->last_idx)
|
||||
{
|
||||
throw std::logic_error(
|
||||
"Followers appear to have later match indices than leader");
|
||||
}
|
||||
|
||||
if (get_commit_watermark_idx() > state->last_idx)
|
||||
{
|
||||
throw std::logic_error(
|
||||
"Followers appear to have later match indices than leader");
|
||||
}
|
||||
const auto new_commit_idx =
|
||||
find_highest_possible_committable_index(new_agreement_index.value());
|
||||
|
||||
commit_if_possible(get_commit_watermark_idx());
|
||||
if (new_commit_idx.has_value())
|
||||
{
|
||||
RAFT_DEBUG_FMT(
|
||||
"In update_commit, new_commit_idx: {}, "
|
||||
"last_idx: {}",
|
||||
new_commit_idx.value(),
|
||||
state->last_idx);
|
||||
|
||||
const auto term_of_new = get_term_internal(new_commit_idx.value());
|
||||
if (term_of_new == state->current_view)
|
||||
{
|
||||
commit(new_commit_idx.value());
|
||||
}
|
||||
else
|
||||
{
|
||||
RAFT_DEBUG_FMT(
|
||||
"Ack quorum at {} resulted in proposed commit index {}, which "
|
||||
"is in term {}. Waiting for agreement on committable entry in "
|
||||
"current term {} to update commit",
|
||||
new_agreement_index.value(),
|
||||
new_commit_idx.value(),
|
||||
term_of_new,
|
||||
state->current_view);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Commits at the highest committable index which is not greater than the
|
||||
// given idx.
|
||||
void commit_if_possible(Index idx)
|
||||
{
|
||||
RAFT_DEBUG_FMT(
|
||||
|
@ -2140,19 +2177,11 @@ namespace aft
|
|||
(idx > state->commit_idx) &&
|
||||
(get_term_internal(idx) <= state->current_view))
|
||||
{
|
||||
Index highest_committable = 0;
|
||||
bool can_commit = false;
|
||||
while (!committable_indices.empty() &&
|
||||
(committable_indices.front() <= idx))
|
||||
const auto highest_committable =
|
||||
find_highest_possible_committable_index(idx);
|
||||
if (highest_committable.has_value())
|
||||
{
|
||||
highest_committable = committable_indices.front();
|
||||
committable_indices.pop_front();
|
||||
can_commit = true;
|
||||
}
|
||||
|
||||
if (can_commit)
|
||||
{
|
||||
commit(highest_committable);
|
||||
commit(highest_committable.value());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2177,6 +2206,8 @@ namespace aft
|
|||
if (idx <= state->commit_idx)
|
||||
return;
|
||||
|
||||
compact_committable_indices(idx);
|
||||
|
||||
state->commit_idx = idx;
|
||||
if (
|
||||
is_retired() && retirement_phase == kv::RetirementPhase::Signed &&
|
||||
|
@ -2241,11 +2272,6 @@ namespace aft
|
|||
}
|
||||
}
|
||||
|
||||
Index get_commit_watermark_idx()
|
||||
{
|
||||
return state->watermark_idx;
|
||||
}
|
||||
|
||||
bool is_self_in_latest_config()
|
||||
{
|
||||
bool present = false;
|
||||
|
|
|
@ -459,8 +459,6 @@ namespace kv
|
|||
virtual bool replicate(const BatchVector& entries, ccf::View view) = 0;
|
||||
virtual std::pair<ccf::View, ccf::SeqNo> get_committed_txid() = 0;
|
||||
|
||||
virtual ccf::SeqNo get_previous_committable_seqno() = 0;
|
||||
|
||||
virtual ccf::View get_view(ccf::SeqNo seqno) = 0;
|
||||
virtual ccf::View get_view() = 0;
|
||||
virtual std::vector<ccf::SeqNo> get_view_history(
|
||||
|
|
|
@ -166,14 +166,6 @@ namespace kv::test
|
|||
return {committed_txid.view, committed_txid.seqno};
|
||||
}
|
||||
|
||||
ccf::SeqNo get_previous_committable_seqno() override
|
||||
{
|
||||
// Since we commit instantly in this stub, we do not distinguish
|
||||
// committable from committed. The last committable thing we saw was
|
||||
// immediately committed, and we store it in committed_txid.
|
||||
return committed_txid.seqno;
|
||||
}
|
||||
|
||||
ccf::SeqNo get_committed_seqno() override
|
||||
{
|
||||
return committed_txid.seqno;
|
||||
|
|
|
@ -72,7 +72,10 @@ state_all
|
|||
assert_is_primary,2
|
||||
|
||||
# New primary appends a new committable entry to confirm its primaryship
|
||||
replicate,2,ConfirmCommit
|
||||
emit_signature,2
|
||||
|
||||
periodic_all,10
|
||||
dispatch_all
|
||||
|
||||
periodic_all,10
|
||||
dispatch_all
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
# Fully-connected 3-node network
|
||||
nodes,0,1,2
|
||||
connect,0,1
|
||||
connect,1,2
|
||||
connect,0,2
|
||||
|
||||
# Node 0 starts first, calls and wins an election
|
||||
periodic_one,0,110
|
||||
dispatch_all
|
||||
|
||||
periodic_all,10
|
||||
dispatch_all
|
||||
|
||||
state_all
|
||||
assert_is_primary,0
|
||||
|
||||
# Node 0 emits an entry, and signature
|
||||
replicate,1,hello world
|
||||
emit_signature,1
|
||||
periodic_all,10
|
||||
|
||||
# But only succeeds in replicating this to node 1
|
||||
drop_pending_to,0,2
|
||||
dispatch_all
|
||||
|
||||
# NB: This dispatch includes the response, so node 0 has advanced commit
|
||||
# This response could arrive much later, since 0 and 1 do not need to communicate further
|
||||
state_all
|
||||
assert_commit_safety,0
|
||||
|
||||
# Disconnect node 0. Not strictly required, but simplifies work going forwards
|
||||
disconnect_node,0
|
||||
|
||||
# Now node 1 calls an election, which it wins with the support of node 2
|
||||
periodic_one,1,110
|
||||
dispatch_all
|
||||
|
||||
# Node 1 may do work in this term, but crucially doesn't get as far as emitting a signature
|
||||
replicate,2,saluton mondo
|
||||
replicate,2,ah well nevertheless
|
||||
state_all
|
||||
|
||||
# Node 2 calls an election, advancing to term 3
|
||||
periodic_one,2,110
|
||||
|
||||
# Node 2's RequestVote reaches Node 1, causing a soft-rollback
|
||||
dispatch_one,2
|
||||
|
||||
# Node 1 should retain the committed state!
|
||||
state_all
|
||||
assert_commit_safety,0
|
||||
|
||||
# Node 1 should not vote for node 2!
|
||||
dispatch_one,1
|
||||
assert_isnot_primary,2
|
||||
|
||||
# Because if they did, then node 2's AEs would break persistence!
|
||||
periodic_all,10
|
||||
dispatch_all
|
||||
state_all
|
||||
assert_commit_safety,0
|
||||
|
||||
# Now restore comms between 0 and 1, and disconnect 2
|
||||
reconnect_node,0
|
||||
disconnect_node,2
|
||||
|
||||
# Despite not being able to _commit_ the entries it holds, node 1
|
||||
# should be able to win an election by advertising them!
|
||||
periodic_one,1,110
|
||||
dispatch_all
|
||||
state_all
|
||||
assert_is_primary,1
|
||||
|
||||
# Regardless of earlier disagreements, we can all agree now
|
||||
reconnect_node,2
|
||||
replicate,4,place trace checker
|
||||
emit_signature,4
|
||||
periodic_all,10
|
||||
dispatch_all
|
||||
periodic_all,10
|
||||
dispatch_all
|
||||
|
||||
state_all
|
||||
assert_state_sync
|
|
@ -622,14 +622,13 @@ BecomeLeader(i) ==
|
|||
\* all unsigned log entries of the previous leader.
|
||||
\* See occurrence of last_committable_index() in raft.h::become_leader.
|
||||
/\ log' = [log EXCEPT ![i] = SubSeq(log[i],1, MaxCommittableIndex(log[i]))]
|
||||
/\ committableIndices' = [committableIndices EXCEPT ![i] = {}]
|
||||
\* Reset our nextIndex to the end of the *new* log.
|
||||
/\ nextIndex' = [nextIndex EXCEPT ![i] = [j \in Servers |-> Len(log'[i]) + 1]]
|
||||
/\ matchIndex' = [matchIndex EXCEPT ![i] = [j \in Servers |-> 0]]
|
||||
\* Shorten the configurations if the removed txs contained reconfigurations
|
||||
/\ configurations' = [configurations EXCEPT ![i] = ConfigurationsToIndex(i, Len(log'[i]))]
|
||||
/\ UNCHANGED <<reconfigurationCount, removedFromConfiguration, messageVars, currentTerm, votedFor,
|
||||
candidateVars, commitIndex>>
|
||||
candidateVars, commitIndex, committableIndices>>
|
||||
|
||||
\* Leader i receives a client request to add v to the log.
|
||||
ClientRequest(i) ==
|
||||
|
@ -708,6 +707,7 @@ AdvanceCommitIndex(i) ==
|
|||
\* We want to get the smallest such index forward that is a signature
|
||||
\* This index must be from the current term,
|
||||
\* as explained by Figure 8 and Section 5.4.2 of https://raft.github.io/raft.pdf
|
||||
\* See find_highest_possible_committable_index in raft.h
|
||||
new_index == SelectInSubSeq(log[i], commitIndex[i]+1, Len(log[i]),
|
||||
LAMBDA e : e.contentType = TypeSignature /\ e.term = currentTerm[i])
|
||||
new_log ==
|
||||
|
|
Загрузка…
Ссылка в новой задаче