From 36de81af485a793e213a903899227e172cf6b04a Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Wed, 11 May 2022 17:33:30 +0100 Subject: [PATCH] Describe indexing strategies in docs (#3832) --- doc/build_apps/api.rst | 15 ++++++++-- doc/build_apps/logging_cpp.rst | 37 ++++++++++++++++++++++-- include/ccf/historical_queries_adapter.h | 12 ++------ include/ccf/indexing/strategy.h | 18 +++++++++--- samples/apps/logging/logging.cpp | 4 +++ 5 files changed, 68 insertions(+), 18 deletions(-) diff --git a/doc/build_apps/api.rst b/doc/build_apps/api.rst index b29b6eb20..cc40d12a1 100644 --- a/doc/build_apps/api.rst +++ b/doc/build_apps/api.rst @@ -103,7 +103,7 @@ Supporting Types Historical Queries ------------------ -.. doxygenfunction:: ccf::historical::adapter_v2 +.. doxygenfunction:: ccf::historical::adapter_v3 :project: CCF .. doxygenclass:: ccf::historical::AbstractStateCache @@ -114,7 +114,18 @@ Historical Queries :project: CCF :members: -.. doxygenstruct:: ccf::Receipt +.. doxygenclass:: ccf::Receipt + :project: CCF + :members: + +Indexing +-------- + +.. doxygenclass:: ccf::indexing::Strategy + :project: CCF + :members: + +.. doxygenclass:: ccf::indexing::strategies::SeqnosByKey_Bucketed_Untyped :project: CCF :members: diff --git a/doc/build_apps/logging_cpp.rst b/doc/build_apps/logging_cpp.rst index d896610bb..05395be87 100644 --- a/doc/build_apps/logging_cpp.rst +++ b/doc/build_apps/logging_cpp.rst @@ -44,7 +44,7 @@ The Logging application simply has: :dedent: Application Endpoints ---------------------- +~~~~~~~~~~~~~~~~~~~~~ The implementation of :cpp:func:`ccfapp::make_user_endpoints()` should return a subclass of :cpp:class:`ccf::endpoints::EndpointRegistry`, containing the endpoints that constitute the app. @@ -182,7 +182,10 @@ This app can then define its own endpoints from a blank slate. If it wants to pr Historical Queries ~~~~~~~~~~~~~~~~~~ -This sample demonstrates how to define a historical query endpoint with the help of :cpp:func:`ccf::historical::adapter_v2`. +This sample demonstrates how to define a historical query endpoint with the help of :cpp:func:`ccf::historical::adapter_v3`. +Most endpoints operate over the _current_ state of the KV, but these historical queries operate over _old_ state, specifically over the writes made by a previous transaction. +The adapter handles extracting the target :term:`Transaction ID` from the user's request, and interacting with the :ref:`Historical Queries API ` to asynchronously fetch this entry from the ledger. +The deserialised and verified transaction is then presented to the handler code below, which performs reads and constructs a response like any other handler. The handler passed to the adapter is very similar to a read-only endpoint definition, but receives a read-only :cpp:struct:`ccf::historical::State` rather than a transaction. @@ -192,6 +195,36 @@ The handler passed to the adapter is very similar to a read-only endpoint defini :end-before: SNIPPET_END: get_historical :dedent: +Indexing +~~~~~~~~ + +The historical endpoint described above must process each target transaction on a specific node, asynchronously, before the result can be served. +For some use cases, in particular where the response is repeated often rather than dynamically constructed, this may be extremely inefficient. +Instead, we would prefer to pre-process all committed transactions and construct an efficient index of their contents, geared towards responding to a known pattern of user queries. + +For instance, if we want to list every value written to a specific key but know that writes are relatively rare, we could build an index of such writes. +When this historical query comes in, rather than fetching every transaction - to extract useful writes from a small fraction - the historical query endpoint can first ask the index which transactions should be processed and fetch only those. +If the response format is known, the index could even pre-construct the response itself. + +In CCF, this is achieved by implementing an indexing :cpp:type:`ccf::indexing::Strategy`. +This is constructed on each node, in-enclave, by processing every committed transaction in-order in the implementation of :cpp:func:`ccf::indexing::Strategy::handle_committed_transaction`. +The strategy can then return its aggregated results to the calling endpoint in whatever format is appropriate. +A :cpp:type:`ccf::indexing::Strategy` may offload partial results to disk to avoid infinite memory growth, via the automatically encrypted LFS (Large File Storage) system. +Since the indexing system and all the strategies it manages exist entirely within the enclave, this has the same trust guarantees as any other in-enclave code - users can trust that the results are accurate and complete, and the query may process private data. + +An example :cpp:type:`ccf::indexing::Strategy` is included in the logging app, to accelerate historical range queries. +This :cpp:type:`strategy ` stores the list of seqnos where every key is written to, offloading completed ranges to disk to cap the total memory useage. +This sample strategy is +In the endpoint handler, rather than requesting every transaction in the requested range, the node relies on its index to fetch only the _interesting_ transactions; those which write to the target key: + +.. literalinclude:: ../../samples/apps/logging/logging.cpp + :language: cpp + :start-after: SNIPPET_START: indexing_strategy_use + :end-before: SNIPPET_END: indexing_strategy_use + :dedent: + +See the sample app for full details of how this strategy is installed and used. + Receipts ~~~~~~~~ diff --git a/include/ccf/historical_queries_adapter.h b/include/ccf/historical_queries_adapter.h index bd4d36ee1..29fefd3d5 100644 --- a/include/ccf/historical_queries_adapter.h +++ b/include/ccf/historical_queries_adapter.h @@ -72,16 +72,8 @@ namespace ccf::historical const CheckHistoricalTxStatus& available, const TxIDExtractor& extractor = txid_from_header); - /// @cond - // Doxygen cannot parse these declarations; some combination of a macro, - // attribute syntax, and namespaced types results in the following warning - // (treated as error): - // Found ';' while parsing initializer list! (doxygen could be confused by a - // macro call without semicolon) - // Use label-less cond to unconditionally exclude this block from parsing - // until the declarations are removed are removed. CCF_DEPRECATED( - "Will be removed in 2.0, switch to ccf::historical::adapter_v2") + "Will be removed in 2.0, switch to ccf::historical::adapter_v3") ccf::endpoints::EndpointFunction adapter_v1( const HandleHistoricalQuery& f, AbstractStateCache& state_cache, @@ -92,7 +84,7 @@ namespace ccf::historical // but the intention is to remove them come 2.0, and make all usage // explicitly versioned CCF_DEPRECATED( - "Will be removed in 2.0, switch to ccf::historical::adapter_v2") + "Will be removed in 2.0, switch to ccf::historical::adapter_v3") ccf::endpoints::EndpointFunction adapter( const HandleHistoricalQuery& f, AbstractStateCache& state_cache, diff --git a/include/ccf/indexing/strategy.h b/include/ccf/indexing/strategy.h index c44863944..f23020f71 100644 --- a/include/ccf/indexing/strategy.h +++ b/include/ccf/indexing/strategy.h @@ -10,6 +10,15 @@ namespace ccf::indexing { + /** The base class for all indexing strategies. + * + * Sub-class this and override handle_committed_transaction to implement your + * own indexing strategy. Create an instance of this on each node, and then + * install it with context.get_indexing_strategies().install_strategy(). It + * will then be given each committed transaction shortly after commit. You + * should build some aggregate/summary from these transactions, and return + * that to endpoint handlers in an efficient format. + */ class Strategy { const std::string name; @@ -23,15 +32,16 @@ namespace ccf::indexing return name; } - // Receives every committed transaction, in-order + /** Receives every committed transaction, in-order, shortly after commit + */ virtual void handle_committed_transaction( const ccf::TxID& tx_id, const kv::ReadOnlyStorePtr& store) = 0; virtual void tick() {} - // Returns next tx for which this index should be populated, or - // nullopt if it wants none. Allows indexes to be populated - // lazily on-demand, or out-of-order, or reset + /** Returns next tx for which this index should be populated, or + * nullopt if it wants none. Allows indexes to be populated + * lazily on-demand, or out-of-order, or reset */ virtual std::optional next_requested() = 0; }; diff --git a/samples/apps/logging/logging.cpp b/samples/apps/logging/logging.cpp index d093e3d9f..c42d460a5 100644 --- a/samples/apps/logging/logging.cpp +++ b/samples/apps/logging/logging.cpp @@ -35,8 +35,10 @@ namespace loggingapp static constexpr auto PUBLIC_FIRST_WRITES = "public:first_write_version"; static constexpr auto FIRST_WRITES = "first_write_version"; + // SNIPPET_START: indexing_strategy_definition using RecordsIndexingStrategy = ccf::indexing::LazyStrategy< ccf::indexing::strategies::SeqnosByKey_Bucketed>; + // SNIPPET_END: indexing_strategy_definition // SNIPPET_START: custom_identity struct CustomIdentity : public ccf::AuthnIdentity @@ -1132,9 +1134,11 @@ namespace loggingapp const auto range_end = std::min(to_seqno, range_begin + max_seqno_per_page); + // SNIPPET_START: indexing_strategy_use const auto interesting_seqnos = index_per_public_key->get_write_txs_in_range( id, range_begin, range_end); + // SNIPPET_END: indexing_strategy_use if (!interesting_seqnos.has_value()) { ctx.rpc_ctx->set_response_status(HTTP_STATUS_ACCEPTED);