зеркало из https://github.com/microsoft/CCF.git
[release/2.x] Cherry pick: Historical queries: Re-request seqnos if host fails to respond (#4976) (#4982)
This commit is contained in:
Родитель
75d81934c5
Коммит
60b3f8eba2
|
@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file.
|
|||
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
||||
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [2.0.15]
|
||||
|
||||
[2.0.15]: https://github.com/microsoft/CCF/releases/tag/ccf-2.0.15
|
||||
|
||||
### Changed
|
||||
|
||||
- Additional logging of historical query flow in `UNSAFE` builds.
|
||||
- Historical query system will re-request entries if the host fails to provide them within a fixed time.
|
||||
|
||||
## [2.0.14]
|
||||
|
||||
[2.0.14]: https://github.com/microsoft/CCF/releases/tag/ccf-2.0.14
|
||||
|
|
|
@ -10,7 +10,7 @@ option(UNSAFE_VERSION "Produce build with unsafe logging levels" OFF)
|
|||
set(CCF_PROJECT "ccf")
|
||||
if(UNSAFE_VERSION)
|
||||
set(CCF_PROJECT "${CCF_PROJECT}_unsafe")
|
||||
add_compile_definitions(UNSAFE_VERSION)
|
||||
add_compile_definitions(UNSAFE_VERSION ENABLE_HISTORICAL_VERBOSE_LOGGING)
|
||||
file(WRITE ${CMAKE_BINARY_DIR}/UNSAFE "UNSAFE")
|
||||
install(FILES ${CMAKE_BINARY_DIR}/UNSAFE DESTINATION share)
|
||||
endif()
|
||||
|
|
|
@ -89,7 +89,7 @@ fi
|
|||
|
||||
source scripts/env/bin/activate
|
||||
pip install -U pip
|
||||
pip install -U wheel black pylint mypy 1>/dev/null
|
||||
pip install -U wheel black==22.12.0 pylint==2.15.10 mypy 1>/dev/null
|
||||
|
||||
echo "$CHECK_DELIMITER"
|
||||
echo "-- Python format"
|
||||
|
|
|
@ -62,6 +62,8 @@ FMT_END_NAMESPACE
|
|||
|
||||
namespace ccf::historical
|
||||
{
|
||||
static constexpr auto slow_fetch_threshold = std::chrono::milliseconds(1000);
|
||||
|
||||
static std::optional<ccf::PrimarySignature> get_signature(
|
||||
const kv::StorePtr& sig_store)
|
||||
{
|
||||
|
@ -293,17 +295,6 @@ namespace ccf::historical
|
|||
return SeqNoCollection(newly_requested.begin(), newly_requested.end());
|
||||
}
|
||||
|
||||
enum class PopulateReceiptsResult
|
||||
{
|
||||
// Common result. The new seqno may have added receipts for some entries
|
||||
Continue,
|
||||
|
||||
// Occasional result. The new seqno was at the end of the sequence (or
|
||||
// an attempt at retrieving a trailing supporting signature), but we
|
||||
// still have receiptless entries, so attempt to fetch the next
|
||||
FetchNext,
|
||||
};
|
||||
|
||||
std::optional<ccf::SeqNo> populate_receipts(ccf::SeqNo new_seqno)
|
||||
{
|
||||
HISTORICAL_LOG(
|
||||
|
@ -508,7 +499,11 @@ namespace ccf::historical
|
|||
// Track all things currently requested by external callers
|
||||
std::map<CompoundHandle, Request> requests;
|
||||
|
||||
std::set<ccf::SeqNo> pending_fetches;
|
||||
// Store each seqno that is currently being fetched by the host, to avoid
|
||||
// spamming it with duplicate requests, and how long it has been fetched
|
||||
// for. If this gap gets too large, we will log a warning and allow it to be
|
||||
// re-fetched
|
||||
std::unordered_map<ccf::SeqNo, std::chrono::milliseconds> pending_fetches;
|
||||
|
||||
ExpiryDuration default_expiry_duration = std::chrono::seconds(1800);
|
||||
|
||||
|
@ -522,22 +517,33 @@ namespace ccf::historical
|
|||
std::optional<ccf::SeqNo> unfetched_from = std::nullopt;
|
||||
std::optional<ccf::SeqNo> unfetched_to = std::nullopt;
|
||||
|
||||
LOG_TRACE_FMT("fetch_entries_range({}, {})", from, to);
|
||||
|
||||
for (auto seqno = from; seqno <= to; ++seqno)
|
||||
{
|
||||
const auto ib = pending_fetches.insert(seqno);
|
||||
if (ib.second)
|
||||
const auto ib = pending_fetches.try_emplace(seqno, 0);
|
||||
if (
|
||||
// Newly requested fetch
|
||||
ib.second ||
|
||||
// Fetch in-progress, but looks slow enough to retry
|
||||
ib.first->second >= slow_fetch_threshold)
|
||||
{
|
||||
if (!unfetched_from.has_value())
|
||||
{
|
||||
unfetched_from = seqno;
|
||||
}
|
||||
unfetched_to = seqno;
|
||||
ib.first->second = std::chrono::milliseconds(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (unfetched_from.has_value())
|
||||
{
|
||||
// Newly requested seqnos
|
||||
LOG_TRACE_FMT(
|
||||
"Writing to ringbuffer ledger_get_range({}, {})",
|
||||
unfetched_from.value(),
|
||||
unfetched_to.value());
|
||||
RINGBUFFER_WRITE_MESSAGE(
|
||||
consensus::ledger_get_range,
|
||||
to_host,
|
||||
|
@ -1101,6 +1107,8 @@ namespace ccf::historical
|
|||
const uint8_t* data,
|
||||
size_t size)
|
||||
{
|
||||
LOG_TRACE_FMT("handle_ledger_entries({}, {})", from_seqno, to_seqno);
|
||||
|
||||
auto seqno = from_seqno;
|
||||
bool all_accepted = true;
|
||||
while (size > 0)
|
||||
|
@ -1129,6 +1137,8 @@ namespace ccf::historical
|
|||
{
|
||||
std::lock_guard<ccf::Mutex> guard(requests_lock);
|
||||
|
||||
LOG_TRACE_FMT("handle_no_entry_range({}, {})", from_seqno, to_seqno);
|
||||
|
||||
for (auto seqno = from_seqno; seqno <= to_seqno; ++seqno)
|
||||
{
|
||||
// The host failed or refused to give this entry. Currently just
|
||||
|
@ -1231,6 +1241,20 @@ namespace ccf::historical
|
|||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& [seqno, time_since_sent] : pending_fetches)
|
||||
{
|
||||
const auto time_after = time_since_sent + elapsed_ms;
|
||||
// Log once, when we cross the time threshold
|
||||
if (
|
||||
time_since_sent < slow_fetch_threshold &&
|
||||
time_after >= slow_fetch_threshold)
|
||||
{
|
||||
LOG_FAIL_FMT(
|
||||
"Fetch for seqno {} is taking an unusually long time", seqno);
|
||||
}
|
||||
time_since_sent = time_after;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#define OVERRIDE_MAX_HISTORY_LEN 4
|
||||
// Uncomment this to aid debugging
|
||||
//#define ENABLE_HISTORICAL_VERBOSE_LOGGING
|
||||
// #define ENABLE_HISTORICAL_VERBOSE_LOGGING
|
||||
|
||||
#include "node/historical_queries.h"
|
||||
|
||||
|
@ -260,6 +260,7 @@ TEST_CASE("StateCache point queries")
|
|||
static const ccf::historical::RequestHandle default_handle = 0;
|
||||
static const ccf::historical::RequestHandle low_handle = 1;
|
||||
static const ccf::historical::RequestHandle high_handle = 2;
|
||||
static const ccf::historical::RequestHandle retry_handle = 3;
|
||||
|
||||
{
|
||||
INFO(
|
||||
|
@ -274,6 +275,48 @@ TEST_CASE("StateCache point queries")
|
|||
REQUIRE(cache.get_state_at(low_handle, low_seqno) == nullptr);
|
||||
}
|
||||
|
||||
{
|
||||
INFO(
|
||||
"If too much time passes without providing request seqnos, failures are "
|
||||
"logged");
|
||||
|
||||
size_t fails_count = 0;
|
||||
|
||||
struct FailCountingStub : public logger::AbstractLogger
|
||||
{
|
||||
size_t& fails_count;
|
||||
|
||||
FailCountingStub(size_t& fc) : fails_count(fc) {}
|
||||
|
||||
void write(
|
||||
const logger::LogLine& ll,
|
||||
const std::optional<double>& enclave_offset = std::nullopt) override
|
||||
{
|
||||
if (ll.log_level == logger::FAIL)
|
||||
{
|
||||
++fails_count;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
logger::config::loggers().emplace_back(
|
||||
std::make_unique<FailCountingStub>(fails_count));
|
||||
|
||||
// Less time than threshold has elapsed => No failed logging
|
||||
cache.tick(ccf::historical::slow_fetch_threshold / 2);
|
||||
REQUIRE(fails_count == 0);
|
||||
|
||||
// Threshold elapsed => Failed logging
|
||||
cache.tick(ccf::historical::slow_fetch_threshold / 2);
|
||||
REQUIRE(fails_count == 3);
|
||||
|
||||
// Much more time passes => No more logging
|
||||
cache.tick(ccf::historical::slow_fetch_threshold * 4);
|
||||
REQUIRE(fails_count == 3);
|
||||
|
||||
logger::config::loggers().clear();
|
||||
}
|
||||
|
||||
{
|
||||
INFO("The host sees requests for these indices");
|
||||
REQUIRE(!stub_writer->writes.empty());
|
||||
|
@ -291,11 +334,37 @@ TEST_CASE("StateCache point queries")
|
|||
auto& to_seqno = to_seqno_;
|
||||
REQUIRE(purpose == consensus::LedgerRequestPurpose::HistoricalQuery);
|
||||
REQUIRE(from_seqno == to_seqno);
|
||||
actual.insert(from_seqno);
|
||||
// Despite multiple calls for each seqno, the host only sees a single
|
||||
// request for each
|
||||
REQUIRE(actual.insert(from_seqno).second);
|
||||
}
|
||||
stub_writer->writes.clear();
|
||||
REQUIRE(actual == expected);
|
||||
}
|
||||
|
||||
{
|
||||
INFO("Once sufficient time has passed, re-requesting will re-fetch");
|
||||
|
||||
cache.tick(ccf::historical::slow_fetch_threshold);
|
||||
REQUIRE(cache.get_state_at(default_handle, low_seqno) == nullptr);
|
||||
REQUIRE(cache.get_state_at(default_handle, unsigned_seqno) == nullptr);
|
||||
|
||||
REQUIRE(!stub_writer->writes.empty());
|
||||
const auto& write = stub_writer->writes[0];
|
||||
const uint8_t* data = write.contents.data();
|
||||
size_t size = write.contents.size();
|
||||
REQUIRE(write.m == consensus::ledger_get_range);
|
||||
auto [from_seqno_, to_seqno_, purpose_] =
|
||||
ringbuffer::read_message<consensus::ledger_get_range>(data, size);
|
||||
auto& purpose = purpose_;
|
||||
auto& from_seqno = from_seqno_;
|
||||
auto& to_seqno = to_seqno_;
|
||||
REQUIRE(purpose == consensus::LedgerRequestPurpose::HistoricalQuery);
|
||||
REQUIRE(from_seqno == to_seqno);
|
||||
REQUIRE(from_seqno == low_seqno);
|
||||
stub_writer->writes.clear();
|
||||
}
|
||||
|
||||
auto provide_ledger_entry = [&](size_t i) {
|
||||
bool accepted = cache.handle_ledger_entry(i, ledger.at(i));
|
||||
return accepted;
|
||||
|
|
Загрузка…
Ссылка в новой задаче