Retain all signatures during a soft rollback (#5749)

This commit is contained in:
Eddy Ashton 2023-10-25 12:24:35 +01:00 коммит произвёл GitHub
Родитель c95948a202
Коммит 27b44fac44
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 177 добавлений и 76 удалений

Просмотреть файл

@ -157,7 +157,6 @@ namespace aft
ccf::View current_view = 0;
kv::Version last_idx = 0;
kv::Version commit_idx = 0;
kv::Version watermark_idx = 0;
ViewHistory view_history;
kv::Version new_view_idx = 0;
@ -183,7 +182,6 @@ namespace aft
current_view,
last_idx,
commit_idx,
watermark_idx,
new_view_idx,
leadership_state,
membership_state);

Просмотреть файл

@ -115,13 +115,6 @@ namespace aft
std::optional<kv::RetirementPhase> retirement_phase = std::nullopt;
std::chrono::milliseconds timeout_elapsed;
// Last (committable) index preceding the node's election, this is
// used to decide when to start issuing signatures. While commit_idx
// hasn't caught up with election_index, a newly elected leader is
// effectively finishing establishing commit over the previous term
// or even previous terms, and can therefore not meaningfully sign
// over the commit level.
kv::Version election_index = 0;
// When this node receives append entries from a new primary, it may need to
// roll back a committable but uncommitted suffix it holds. The
@ -314,6 +307,33 @@ namespace aft
committable_indices.back();
}
// Returns the highest committable index which is not greater than the
// given idx.
std::optional<Index> find_highest_possible_committable_index(
Index idx) const
{
const auto it = std::upper_bound(
committable_indices.rbegin(),
committable_indices.rend(),
idx,
[](const auto& l, const auto& r) { return l >= r; });
if (it == committable_indices.rend())
{
return std::nullopt;
}
return *it;
}
void compact_committable_indices(Index idx)
{
while (!committable_indices.empty() &&
(committable_indices.front() <= idx))
{
committable_indices.pop_front();
}
}
void enable_all_domains() override
{
// When receiving append entries as a follower, all security domains will
@ -405,12 +425,6 @@ namespace aft
return {get_term_internal(commit_idx), commit_idx};
}
ccf::SeqNo get_previous_committable_seqno() override
{
std::lock_guard<ccf::pal::Mutex> guard(state->lock);
return last_committable_index();
}
Term get_view(Index idx) override
{
std::lock_guard<ccf::pal::Mutex> guard(state->lock);
@ -1815,17 +1829,7 @@ namespace aft
return;
}
// When we force to become the primary we are going around the
// consensus protocol. This only happens when a node starts a new network
// and has a genesis or recovery tx as the last transaction
election_index = last_committable_index();
// A newly elected leader must not advance the commit index until a
// transaction in the new term commits. We achieve this by clearing our
// list committable indices - so nothing from a previous term is now
// considered committable. Instead this new primary will shortly produce
// their own signature, which _will_ be considered committable.
committable_indices.clear();
const auto election_index = last_committable_index();
RAFT_DEBUG_FMT(
"Election index is {} in term {}", election_index, state->current_view);
@ -1897,6 +1901,9 @@ namespace aft
restart_election_timeout();
reset_last_ack_timeouts();
// Drop anything unsigned here, but retain all signed entries. Only do a
// more aggressive rollback, potentially including signatures, when
// receiving a conflicting AppendEntries
rollback(last_committable_index());
if (can_endorse_primary())
@ -2074,12 +2081,18 @@ namespace aft
}
}
// If there exists some committable idx in the current term such that idx >
// commit_idx and a majority of nodes have replicated it, commit to that
// idx.
void update_commit()
{
// If there exists some idx in the current term such that
// idx > commit_idx and a majority of nodes have replicated it,
// commit to that idx.
auto new_commit_idx = std::numeric_limits<Index>::max();
if (state->leadership_state != kv::LeadershipState::Leader)
{
throw std::logic_error(
"update_commit() must only be called while this node is leader");
}
std::optional<Index> new_agreement_index = std::nullopt;
// Obtain CFT watermarks
for (auto const& c : configurations)
@ -2104,31 +2117,55 @@ namespace aft
sort(match.begin(), match.end());
auto confirmed = match.at((match.size() - 1) / 2);
if (confirmed < new_commit_idx)
if (
!new_agreement_index.has_value() ||
confirmed < new_agreement_index.value())
{
new_commit_idx = confirmed;
new_agreement_index = confirmed;
}
}
RAFT_DEBUG_FMT(
"In update_commit, new_commit_idx: {}, "
"last_idx: {}",
new_commit_idx,
state->last_idx);
if (new_commit_idx != std::numeric_limits<Index>::max())
if (new_agreement_index.has_value())
{
state->watermark_idx = new_commit_idx;
}
if (new_agreement_index.value() > state->last_idx)
{
throw std::logic_error(
"Followers appear to have later match indices than leader");
}
if (get_commit_watermark_idx() > state->last_idx)
{
throw std::logic_error(
"Followers appear to have later match indices than leader");
}
const auto new_commit_idx =
find_highest_possible_committable_index(new_agreement_index.value());
commit_if_possible(get_commit_watermark_idx());
if (new_commit_idx.has_value())
{
RAFT_DEBUG_FMT(
"In update_commit, new_commit_idx: {}, "
"last_idx: {}",
new_commit_idx.value(),
state->last_idx);
const auto term_of_new = get_term_internal(new_commit_idx.value());
if (term_of_new == state->current_view)
{
commit(new_commit_idx.value());
}
else
{
RAFT_DEBUG_FMT(
"Ack quorum at {} resulted in proposed commit index {}, which "
"is in term {}. Waiting for agreement on committable entry in "
"current term {} to update commit",
new_agreement_index.value(),
new_commit_idx.value(),
term_of_new,
state->current_view);
}
}
}
}
// Commits at the highest committable index which is not greater than the
// given idx.
void commit_if_possible(Index idx)
{
RAFT_DEBUG_FMT(
@ -2140,19 +2177,11 @@ namespace aft
(idx > state->commit_idx) &&
(get_term_internal(idx) <= state->current_view))
{
Index highest_committable = 0;
bool can_commit = false;
while (!committable_indices.empty() &&
(committable_indices.front() <= idx))
const auto highest_committable =
find_highest_possible_committable_index(idx);
if (highest_committable.has_value())
{
highest_committable = committable_indices.front();
committable_indices.pop_front();
can_commit = true;
}
if (can_commit)
{
commit(highest_committable);
commit(highest_committable.value());
}
}
}
@ -2177,6 +2206,8 @@ namespace aft
if (idx <= state->commit_idx)
return;
compact_committable_indices(idx);
state->commit_idx = idx;
if (
is_retired() && retirement_phase == kv::RetirementPhase::Signed &&
@ -2241,11 +2272,6 @@ namespace aft
}
}
Index get_commit_watermark_idx()
{
return state->watermark_idx;
}
bool is_self_in_latest_config()
{
bool present = false;

Просмотреть файл

@ -459,8 +459,6 @@ namespace kv
virtual bool replicate(const BatchVector& entries, ccf::View view) = 0;
virtual std::pair<ccf::View, ccf::SeqNo> get_committed_txid() = 0;
virtual ccf::SeqNo get_previous_committable_seqno() = 0;
virtual ccf::View get_view(ccf::SeqNo seqno) = 0;
virtual ccf::View get_view() = 0;
virtual std::vector<ccf::SeqNo> get_view_history(

Просмотреть файл

@ -166,14 +166,6 @@ namespace kv::test
return {committed_txid.view, committed_txid.seqno};
}
ccf::SeqNo get_previous_committable_seqno() override
{
// Since we commit instantly in this stub, we do not distinguish
// committable from committed. The last committable thing we saw was
// immediately committed, and we store it in committed_txid.
return committed_txid.seqno;
}
ccf::SeqNo get_committed_seqno() override
{
return committed_txid.seqno;

Просмотреть файл

@ -72,7 +72,10 @@ state_all
assert_is_primary,2
# New primary appends a new committable entry to confirm its primaryship
replicate,2,ConfirmCommit
emit_signature,2
periodic_all,10
dispatch_all
periodic_all,10
dispatch_all

Просмотреть файл

@ -0,0 +1,84 @@
# Fully-connected 3-node network
nodes,0,1,2
connect,0,1
connect,1,2
connect,0,2
# Node 0 starts first, calls and wins an election
periodic_one,0,110
dispatch_all
periodic_all,10
dispatch_all
state_all
assert_is_primary,0
# Node 0 emits an entry, and signature
replicate,1,hello world
emit_signature,1
periodic_all,10
# But only succeeds in replicating this to node 1
drop_pending_to,0,2
dispatch_all
# NB: This dispatch includes the response, so node 0 has advanced commit
# This response could arrive much later, since 0 and 1 do not need to communicate further
state_all
assert_commit_safety,0
# Disconnect node 0. Not strictly required, but simplifies work going forwards
disconnect_node,0
# Now node 1 calls an election, which it wins with the support of node 2
periodic_one,1,110
dispatch_all
# Node 1 may do work in this term, but crucially doesn't get as far as emitting a signature
replicate,2,saluton mondo
replicate,2,ah well nevertheless
state_all
# Node 2 calls an election, advancing to term 3
periodic_one,2,110
# Node 2's RequestVote reaches Node 1, causing a soft-rollback
dispatch_one,2
# Node 1 should retain the committed state!
state_all
assert_commit_safety,0
# Node 1 should not vote for node 2!
dispatch_one,1
assert_isnot_primary,2
# Because if they did, then node 2's AEs would break persistence!
periodic_all,10
dispatch_all
state_all
assert_commit_safety,0
# Now restore comms between 0 and 1, and disconnect 2
reconnect_node,0
disconnect_node,2
# Despite not being able to _commit_ the entries it holds, node 1
# should be able to win an election by advertising them!
periodic_one,1,110
dispatch_all
state_all
assert_is_primary,1
# Regardless of earlier disagreements, we can all agree now
reconnect_node,2
replicate,4,place trace checker
emit_signature,4
periodic_all,10
dispatch_all
periodic_all,10
dispatch_all
state_all
assert_state_sync

Просмотреть файл

@ -622,14 +622,13 @@ BecomeLeader(i) ==
\* all unsigned log entries of the previous leader.
\* See occurrence of last_committable_index() in raft.h::become_leader.
/\ log' = [log EXCEPT ![i] = SubSeq(log[i],1, MaxCommittableIndex(log[i]))]
/\ committableIndices' = [committableIndices EXCEPT ![i] = {}]
\* Reset our nextIndex to the end of the *new* log.
/\ nextIndex' = [nextIndex EXCEPT ![i] = [j \in Servers |-> Len(log'[i]) + 1]]
/\ matchIndex' = [matchIndex EXCEPT ![i] = [j \in Servers |-> 0]]
\* Shorten the configurations if the removed txs contained reconfigurations
/\ configurations' = [configurations EXCEPT ![i] = ConfigurationsToIndex(i, Len(log'[i]))]
/\ UNCHANGED <<reconfigurationCount, removedFromConfiguration, messageVars, currentTerm, votedFor,
candidateVars, commitIndex>>
candidateVars, commitIndex, committableIndices>>
\* Leader i receives a client request to add v to the log.
ClientRequest(i) ==
@ -708,6 +707,7 @@ AdvanceCommitIndex(i) ==
\* We want to get the smallest such index forward that is a signature
\* This index must be from the current term,
\* as explained by Figure 8 and Section 5.4.2 of https://raft.github.io/raft.pdf
\* See find_highest_possible_committable_index in raft.h
new_index == SelectInSubSeq(log[i], commitIndex[i]+1, Len(log[i]),
LAMBDA e : e.contentType = TypeSignature /\ e.term = currentTerm[i])
new_log ==