From c02338d5226cc0a7643ccab22cdfe1a6e5c5e244 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 7 Sep 2020 12:20:26 +0100 Subject: [PATCH] AFT: Forward requests correctly (#1579) --- CMakeLists.txt | 200 +++++++++++-------------- samples/apps/smallbank/smallbank.cmake | 104 ++++++------- src/consensus/aft/impl/execution.cpp | 1 + src/consensus/aft/raft.h | 1 + src/enclave/rpc_context.h | 1 + src/kv/store.h | 7 +- src/node/rpc/frontend.h | 16 +- src/node/rpc/test/frontend_test.cpp | 1 + tests/infra/consortium.py | 12 +- tests/infra/network.py | 23 +-- 10 files changed, 169 insertions(+), 197 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f64fd4ec4..63e50dd5f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,7 @@ configure_file( ) configure_file(${CCF_DIR}/python/setup.py.in ${CCF_DIR}/python/setup.py @ONLY) -set(CONSENSUSES raft) +set(CONSENSUSES raft pbft) option(BUILD_TESTS "Build tests" ON) option(BUILD_UNIT_TESTS "Build unit tests" ON) @@ -666,125 +666,99 @@ if(BUILD_TESTS) ) endif() + # Lua sample app (tx regulator) end to end test + add_e2e_test( + NAME lua_txregulator_test_raft + PYTHON_SCRIPT + ${CMAKE_CURRENT_SOURCE_DIR}/samples/apps/txregulator/tests/txregulatorclient.py + CONSENSUS raft + ADDITIONAL_ARGS + --app-script + ${CMAKE_CURRENT_SOURCE_DIR}/samples/apps/txregulator/app/txregulator.lua + --datafile + ${CMAKE_CURRENT_SOURCE_DIR}/samples/apps/txregulator/dataset/sample_data.csv + ) + + add_e2e_test( + NAME js_e2e_logging_raft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py + CONSENSUS raft + ADDITIONAL_ARGS --js-app-script + ${CMAKE_SOURCE_DIR}/src/apps/logging/logging_js.lua + ) + + add_e2e_test( + NAME e2e_scenario_raft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_scenarios.py + CONSENSUS raft + ADDITIONAL_ARGS --scenario + ${CMAKE_SOURCE_DIR}/tests/simple_logging_scenario.json + ) + + add_e2e_test( + NAME ws_logging_raft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/ws_scaffold.py + CONSENSUS raft + ) + + add_e2e_test( + NAME lua_e2e_logging_raft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py + CONSENSUS raft + ADDITIONAL_ARGS --app-script + ${CMAKE_SOURCE_DIR}/src/apps/logging/logging.lua + ) + + add_e2e_test( + NAME member_client_test_raft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/memberclient.py + CONSENSUS raft + ) + + if(NOT SAN) + add_e2e_test( + NAME connections_raft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/connections.py + CONSENSUS raft + ) + endif() + + add_e2e_test( + NAME receipts_test_raft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/receipts.py + CONSENSUS raft + ) + + if(TLS_TEST) + add_e2e_test( + NAME tlstest_raft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/tlstest.py + CONSENSUS raft + LABEL tlstest + ) + endif() + + add_e2e_test( + NAME schema_test_raft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/schema.py + CONSENSUS raft + ADDITIONAL_ARGS -p liblogging --schema-dir ${CMAKE_SOURCE_DIR}/doc/schemas + ) + + add_e2e_test( + NAME election_test_raft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/election.py + CONSENSUS raft + ADDITIONAL_ARGS "--raft-election-timeout" "4000" + ) + foreach(CONSENSUS ${CONSENSUSES}) - # Lua sample app (tx regulator) end to end test - add_e2e_test( - NAME lua_txregulator_test_${CONSENSUS} - PYTHON_SCRIPT - ${CMAKE_CURRENT_SOURCE_DIR}/samples/apps/txregulator/tests/txregulatorclient.py - CONSENSUS ${CONSENSUS} - ADDITIONAL_ARGS - --app-script - ${CMAKE_CURRENT_SOURCE_DIR}/samples/apps/txregulator/app/txregulator.lua - --datafile - ${CMAKE_CURRENT_SOURCE_DIR}/samples/apps/txregulator/dataset/sample_data.csv - ) - - add_e2e_test( - NAME js_e2e_logging_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py - CONSENSUS ${CONSENSUS} - ADDITIONAL_ARGS --js-app-script - ${CMAKE_SOURCE_DIR}/src/apps/logging/logging_js.lua - ) - - add_e2e_test( - NAME e2e_scenario_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_scenarios.py - CONSENSUS ${CONSENSUS} - ADDITIONAL_ARGS --scenario - ${CMAKE_SOURCE_DIR}/tests/simple_logging_scenario.json - ) - - add_e2e_test( - NAME ws_scaffold_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/ws_scaffold.py - CONSENSUS ${CONSENSUS} - ) - - add_e2e_test( - NAME lua_e2e_logging_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py - CONSENSUS ${CONSENSUS} - ADDITIONAL_ARGS --app-script - ${CMAKE_SOURCE_DIR}/src/apps/logging/logging.lua - ) - - add_e2e_test( - NAME member_client_test_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/memberclient.py - CONSENSUS ${CONSENSUS} - ) - - if(NOT SAN) - add_e2e_test( - NAME connections_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/connections.py - CONSENSUS ${CONSENSUS} - ) - endif() - - add_e2e_test( - NAME receipts_test_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/receipts.py - CONSENSUS ${CONSENSUS} - ) - - if(TLS_TEST AND ${CONSENSUS} STREQUAL raft) - add_e2e_test( - NAME tlstest_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/tlstest.py - CONSENSUS ${CONSENSUS} - LABEL tlstest - ) - endif() - - add_e2e_test( - NAME schema_test_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/schema.py - CONSENSUS ${CONSENSUS} - ADDITIONAL_ARGS -p liblogging --schema-dir - ${CMAKE_SOURCE_DIR}/doc/schemas - ) - add_e2e_test( NAME cpp_e2e_logging_${CONSENSUS} PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py CURL_CLIENT TRUE CONSENSUS ${CONSENSUS} ) - - if(${CONSENSUS} STREQUAL pbft) - set(ELECTION_TIMEOUT_ARG "--pbft-view-change-timeout" "4000") - else() - set(ELECTION_TIMEOUT_ARG "--raft-election-timeout" "4000") - endif() - - add_e2e_test( - NAME election_test_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/election.py - CONSENSUS ${CONSENSUS} - ADDITIONAL_ARGS ${ELECTION_TIMEOUT_ARG} - ) - - if(${CONSENSUS} STREQUAL pbft) - add_e2e_test( - NAME suspend_nodes_pbft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/suspend_nodes.py - CONSENSUS pbft - LABEL long_test - ADDITIONAL_ARGS --seed 42 - ) - - add_e2e_test( - NAME replay_new_view_pbft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/replay_new_view.py - CONSENSUS pbft - LABEL long_test - ADDITIONAL_ARGS --seed 42 - ) - endif() - endforeach() add_perf_test( diff --git a/samples/apps/smallbank/smallbank.cmake b/samples/apps/smallbank/smallbank.cmake index b92d304ff..1843df93e 100644 --- a/samples/apps/smallbank/smallbank.cmake +++ b/samples/apps/smallbank/smallbank.cmake @@ -64,68 +64,56 @@ get_verification_file( if(BUILD_TESTS) # Small Bank end to end and performance test - foreach(CONSENSUS ${CONSENSUSES}) - if(${CONSENSUS} STREQUAL pbft) - if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") - set(SMALL_BANK_ITERATIONS 50000) - else() - set(SMALL_BANK_ITERATIONS 2000) - endif() - else() - set(SMALL_BANK_ITERATIONS 200000) - endif() - get_verification_file(${SMALL_BANK_ITERATIONS} SMALL_BANK_VERIFICATION_FILE) + set(SMALL_BANK_ITERATIONS 200000) + get_verification_file(${SMALL_BANK_ITERATIONS} SMALL_BANK_VERIFICATION_FILE) - add_perf_test( - NAME small_bank_client_test_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/small_bank_client.py - CLIENT_BIN ./small_bank_client - VERIFICATION_FILE ${SMALL_BANK_VERIFICATION_FILE} - LABEL SB - CONSENSUS ${CONSENSUS} - ADDITIONAL_ARGS - --transactions ${SMALL_BANK_ITERATIONS} --max-writes-ahead 1000 - --metrics-file small_bank_${CONSENSUS}_metrics.json - ) + add_perf_test( + NAME small_bank_client_test_raft + PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/small_bank_client.py + CLIENT_BIN ./small_bank_client + VERIFICATION_FILE ${SMALL_BANK_VERIFICATION_FILE} + LABEL SB + CONSENSUS raft + ADDITIONAL_ARGS --transactions ${SMALL_BANK_ITERATIONS} --max-writes-ahead + 1000 --metrics-file small_bank_raft_metrics.json + ) - add_perf_test( - NAME small_bank_client_ws_test_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/small_bank_client.py - CLIENT_BIN ./small_bank_client - VERIFICATION_FILE ${SMALL_BANK_VERIFICATION_FILE} - LABEL SB_WS - CONSENSUS ${CONSENSUS} - ADDITIONAL_ARGS - --transactions - ${SMALL_BANK_ITERATIONS} - --max-writes-ahead - 1000 - --metrics-file - small_bank_${CONSENSUS}_metrics.json - --use-websockets - ) + add_perf_test( + NAME small_bank_client_ws_test_raft + PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/small_bank_client.py + CLIENT_BIN ./small_bank_client + VERIFICATION_FILE ${SMALL_BANK_VERIFICATION_FILE} + LABEL SB_WS + CONSENSUS raft + ADDITIONAL_ARGS + --transactions + ${SMALL_BANK_ITERATIONS} + --max-writes-ahead + 1000 + --metrics-file + small_bank_raft_metrics.json + --use-websockets + ) - add_perf_test( - NAME small_bank_sigs_client_test_${CONSENSUS} - PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/small_bank_client.py - CLIENT_BIN ./small_bank_client - VERIFICATION_FILE ${SMALL_BANK_SIGNED_VERIFICATION_FILE} - LABEL "SB_sig" - CONSENSUS ${CONSENSUS} - ADDITIONAL_ARGS - --transactions - ${SMALL_BANK_SIGNED_ITERATIONS} - --max-writes-ahead - 1000 - --sign - --participants-curve - "secp256k1" - --metrics-file - small_bank_${CONSENSUS}_sigs_metrics.json - ) - - endforeach() + add_perf_test( + NAME small_bank_sigs_client_test_raft + PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/small_bank_client.py + CLIENT_BIN ./small_bank_client + VERIFICATION_FILE ${SMALL_BANK_SIGNED_VERIFICATION_FILE} + LABEL "SB_sig" + CONSENSUS raft + ADDITIONAL_ARGS + --transactions + ${SMALL_BANK_SIGNED_ITERATIONS} + --max-writes-ahead + 1000 + --sign + --participants-curve + "secp256k1" + --metrics-file + small_bank_raft_sigs_metrics.json + ) # It is better to run performance tests with forwarding on different machines # (i.e. nodes and clients) diff --git a/src/consensus/aft/impl/execution.cpp b/src/consensus/aft/impl/execution.cpp index c61c13cc0..1719e1b14 100644 --- a/src/consensus/aft/impl/execution.cpp +++ b/src/consensus/aft/impl/execution.cpp @@ -61,6 +61,7 @@ namespace aft NoNode, ctx->pbft_raw.data(), ctx->pbft_raw.size()); ctx->is_create_request = is_create_request; + ctx->execute_on_node = true; ctx->set_apply_writes(true); enclave::RpcHandler::ProcessPbftResp rep = frontend->process_pbft(ctx); diff --git a/src/consensus/aft/raft.h b/src/consensus/aft/raft.h index b45df2c8c..f79df176d 100644 --- a/src/consensus/aft/raft.h +++ b/src/consensus/aft/raft.h @@ -319,6 +319,7 @@ namespace aft { for (auto& [index, data, globally_committable] : entries) { + state->last_idx = index; ledger->put_entry(*data, globally_committable, false); } return true; diff --git a/src/enclave/rpc_context.h b/src/enclave/rpc_context.h index 68066514a..3a337a38c 100644 --- a/src/enclave/rpc_context.h +++ b/src/enclave/rpc_context.h @@ -117,6 +117,7 @@ namespace enclave std::vector pbft_raw = {}; bool is_create_request = false; + bool execute_on_node = false; RpcContext(std::shared_ptr s) : session(s) {} diff --git a/src/kv/store.h b/src/kv/store.h index 05c098bc2..cb0760766 100644 --- a/src/kv/store.h +++ b/src/kv/store.h @@ -750,7 +750,12 @@ namespace kv { // we have deserialised an entry that didn't belong to the pbft // requests, nor the pbft new views, nor the pbft pre prepares table - return DeserialiseSuccess::FAILED; + + // NOTE: we currently do not support signature transactions and said + // support will be added in the near future + LOG_FAIL_FMT("Failed to deserialise"); + LOG_DEBUG_FMT( + "Unexpected contents in pbft transaction size {}", views.size()); } } diff --git a/src/node/rpc/frontend.h b/src/node/rpc/frontend.h index 7fd68b408..42e6c7eb8 100644 --- a/src/node/rpc/frontend.h +++ b/src/node/rpc/frontend.h @@ -331,7 +331,10 @@ namespace ccf update_history(); - if (!is_primary && consensus->type() == ConsensusType::RAFT) + if ((!is_primary && + (consensus->type() == ConsensusType::RAFT || + (consensus->type() != ConsensusType::RAFT && + !ctx->execute_on_node)))) { switch (endpoint->forwarding_required) { @@ -342,8 +345,13 @@ namespace ccf case ForwardingRequired::Sometimes: { - if (ctx->session->is_forwarding) + if ( + (ctx->session->is_forwarding && + consensus->type() == ConsensusType::RAFT) || + (consensus->type() != ConsensusType::RAFT && + !ctx->execute_on_node)) { + ctx->session->is_forwarding = true; return forward_or_redirect_json(ctx, endpoint, caller_id); } break; @@ -531,7 +539,9 @@ namespace ccf auto caller_id = endpoints.get_caller_id(tx, ctx->session->caller_cert); - if (consensus != nullptr && consensus->type() == ConsensusType::PBFT) + if ( + consensus != nullptr && consensus->type() == ConsensusType::PBFT && + (ctx->execute_on_node || consensus->is_primary())) { auto rep = process_if_local_node_rpc(ctx, tx, caller_id); if (rep.has_value()) diff --git a/src/node/rpc/test/frontend_test.cpp b/src/node/rpc/test/frontend_test.cpp index c1d325dfe..db3d05236 100644 --- a/src/node/rpc/test/frontend_test.cpp +++ b/src/node/rpc/test/frontend_test.cpp @@ -505,6 +505,7 @@ TEST_CASE("process_pbft") auto session = std::make_shared( enclave::InvalidSessionId, user_id, user_caller_der); auto ctx = enclave::make_rpc_context(session, request.raw); + ctx->execute_on_node = true; frontend.process_pbft(ctx); kv::Tx tx; diff --git a/tests/infra/consortium.py b/tests/infra/consortium.py index 1ecae173d..5cfe941a7 100644 --- a/tests/infra/consortium.py +++ b/tests/infra/consortium.py @@ -256,7 +256,7 @@ class Consortium: self.vote_using_majority(remote_node, proposal) member_to_retire.status_code = infra.member.MemberStatus.RETIRED - def open_network(self, remote_node, pbft_open=False): + def open_network(self, remote_node): """ Assuming a network in state OPENING, this functions creates a new proposal and make members vote to transition the network to state @@ -265,10 +265,8 @@ class Consortium: proposal_body, careful_vote = self.make_proposal("open_network") proposal = self.get_any_active_member().propose(remote_node, proposal_body) proposal.vote_for = careful_vote - self.vote_using_majority( - remote_node, proposal, wait_for_global_commit=(not pbft_open) - ) - self.check_for_service(remote_node, infra.network.ServiceStatus.OPEN, pbft_open) + self.vote_using_majority(remote_node, proposal, wait_for_global_commit=True) + self.check_for_service(remote_node, infra.network.ServiceStatus.OPEN) def rekey_ledger(self, remote_node): proposal_body, careful_vote = self.make_proposal("rekey_ledger") @@ -367,7 +365,7 @@ class Consortium: proposal.vote_for = careful_vote return self.vote_using_majority(remote_node, proposal) - def check_for_service(self, remote_node, status, pbft_open=False): + def check_for_service(self, remote_node, status): """ Check via the member frontend of the given node that the certificate associated with current CCF service signing key has been recorded in @@ -397,7 +395,7 @@ class Consortium: return service """ }, - timeout=(30 if pbft_open else 3), + timeout=3, ) current_status = r.body["status"] current_cert = r.body["cert"] diff --git a/tests/infra/network.py b/tests/infra/network.py index cccdcc01d..304c7f0dd 100644 --- a/tests/infra/network.py +++ b/tests/infra/network.py @@ -328,8 +328,7 @@ class Network: self.create_users(initial_users, args.participants_curve) primary = self._start_all_nodes(args) - if args.consensus != "pbft": - self.wait_for_all_nodes_to_catch_up(primary) + self.wait_for_all_nodes_to_catch_up(primary) LOG.success("All nodes joined network") self.consortium.activate(primary) @@ -351,9 +350,7 @@ class Network: self.consortium.add_users(primary, initial_users) LOG.info("Initial set of users added") - self.consortium.open_network( - remote_node=primary, pbft_open=(args.consensus == "pbft") - ) + self.consortium.open_network(remote_node=primary) self.status = ServiceStatus.OPEN LOG.success("***** Network is now open *****") @@ -405,9 +402,7 @@ class Network: node, "partOfNetwork", timeout=args.ledger_recovery_timeout ) - self.consortium.check_for_service( - primary, ServiceStatus.OPEN, pbft_open=(args.consensus == "pbft") - ) + self.consortium.check_for_service(primary, ServiceStatus.OPEN) LOG.success("***** Recovered network is now open *****") def store_current_network_encryption_key(self): @@ -498,19 +493,17 @@ class Network: try: if self.status is ServiceStatus.OPEN: self.consortium.trust_node(primary, new_node.node_id) - if args.consensus != "pbft": - # Here, quote verification has already been run when the node - # was added as pending. Only wait for the join timer for the - # joining node to retrieve network secrets. - new_node.wait_for_node_to_join(timeout=ceil(args.join_timer * 2 / 1000)) + # Here, quote verification has already been run when the node + # was added as pending. Only wait for the join timer for the + # joining node to retrieve network secrets. + new_node.wait_for_node_to_join(timeout=ceil(args.join_timer * 2 / 1000)) except (ValueError, TimeoutError): LOG.error(f"New trusted node {new_node.node_id} failed to join the network") new_node.stop() raise new_node.network_state = infra.node.NodeNetworkState.joined - if args.consensus != "pbft": - self.wait_for_all_nodes_to_catch_up(primary) + self.wait_for_all_nodes_to_catch_up(primary) return new_node