зеркало из https://github.com/microsoft/CCF.git
Re-enable code update and add node tests (#362)
This commit is contained in:
Родитель
0ce7460cfa
Коммит
30c1c230fb
|
@ -283,22 +283,20 @@ if(BUILD_TESTS)
|
|||
--oesign ${OESIGN}
|
||||
)
|
||||
|
||||
# TODO: Re-enable these tests
|
||||
# add_e2e_test(
|
||||
# NAME add_node_test
|
||||
# PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/addnode.py
|
||||
# )
|
||||
add_e2e_test(
|
||||
NAME add_node_test
|
||||
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/addnode.py
|
||||
)
|
||||
|
||||
## Code update test
|
||||
# add_e2e_test(
|
||||
# NAME code_update_test
|
||||
# PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/code_update.py
|
||||
# ADDITIONAL_ARGS
|
||||
# --oesign ${OESIGN}
|
||||
# --oeconfpath ${CMAKE_CURRENT_SOURCE_DIR}/src/apps/logging/oe_sign.conf
|
||||
# --oesignkeypath ${CMAKE_CURRENT_SOURCE_DIR}/src/apps/sample_key.pem
|
||||
# --election-timeout 1500
|
||||
# )
|
||||
add_e2e_test(
|
||||
NAME code_update_test
|
||||
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/code_update.py
|
||||
ADDITIONAL_ARGS
|
||||
--oesign ${OESIGN}
|
||||
--oeconfpath ${CMAKE_CURRENT_SOURCE_DIR}/src/apps/logging/oe_sign.conf
|
||||
--oesignkeypath ${CMAKE_CURRENT_SOURCE_DIR}/src/apps/sample_key.pem
|
||||
--election-timeout 1500
|
||||
)
|
||||
endif()
|
||||
|
||||
add_e2e_test(
|
||||
|
@ -313,8 +311,6 @@ if(BUILD_TESTS)
|
|||
--scenario ${CMAKE_SOURCE_DIR}/tests/simple_logging_scenario.json
|
||||
)
|
||||
|
||||
# TODO: These tests make use of dynamic node configuration that is not
|
||||
# yet supported by PBFT
|
||||
add_e2e_test(
|
||||
NAME election_tests
|
||||
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/election.py
|
||||
|
|
|
@ -20,7 +20,7 @@ For example, ``member1`` may submit a proposal to add a new member (``member4``)
|
|||
|
||||
.. code-block:: bash
|
||||
|
||||
$ memberclient --server-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem add_member --member-cert member4_cert.pem
|
||||
$ memberclient --rpc-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem add_member --member-cert member4_cert.pem
|
||||
{"commit":100,"global_commit":99,"id":0,"jsonrpc":"2.0","result":{"completed":false,"id":1},"term":2}
|
||||
|
||||
In this case, a new proposal with id ``1`` has successfully been created and the proposer member has automatically accepted it. Other members can then accept or reject the proposal:
|
||||
|
@ -30,12 +30,12 @@ In this case, a new proposal with id ``1`` has successfully been created and the
|
|||
// Proposal 1 is already created by member 1 (votes: 1/3)
|
||||
|
||||
// Member 2 rejects the proposal (votes: 1/3)
|
||||
$ memberclient --server-address 127.83.203.69:55526 --cert member2_cert.pem --privk member2_privk.pem --ca networkcert.pem vote --reject --proposal-id 1
|
||||
$ memberclient --rpc-address 127.83.203.69:55526 --cert member2_cert.pem --privk member2_privk.pem --ca networkcert.pem vote --reject --proposal-id 1
|
||||
{"commit":104,"global_commit":103,"id":0,"jsonrpc":"2.0","result":false,"term":2}
|
||||
|
||||
// Member 3 accepts the proposal (votes: 2/3)
|
||||
// As a quorum of members have accepted the proposal, member4 is added to the consortium
|
||||
$ memberclient --server-address 127.83.203.69:55526 --cert member3_cert.pem --privk member3_privk.pem --ca networkcert.pem vote --accept --proposal-id 1
|
||||
$ memberclient --rpc-address 127.83.203.69:55526 --cert member3_cert.pem --privk member3_privk.pem --ca networkcert.pem vote --accept --proposal-id 1
|
||||
{"commit":106,"global_commit":105,"id":0,"jsonrpc":"2.0","result":true,"term":2}
|
||||
|
||||
As soon as ``member3`` accepts the proposal, a quorum (2 out of 3) of members has been reached and the proposal completes, successfully adding ``member4``.
|
||||
|
@ -44,7 +44,7 @@ As soon as ``member3`` accepts the proposal, a quorum (2 out of 3) of members ha
|
|||
|
||||
.. code-block:: bash
|
||||
|
||||
$ memberclient --server-address 127.83.203.69:55526 --cert member4_cert.pem --privk member4_privk.pem --ca networkcert.pem ack
|
||||
$ memberclient --rpc-address 127.83.203.69:55526 --cert member4_cert.pem --privk member4_privk.pem --ca networkcert.pem ack
|
||||
{"commit":108,"global_commit":107,"id":2,"jsonrpc":"2.0","result":true,"term":2}
|
||||
|
||||
|
||||
|
@ -55,7 +55,7 @@ The details of pending proposals, including the proposer member ID, proposal scr
|
|||
|
||||
.. code-block:: bash
|
||||
|
||||
$ memberclient --server-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem proposal_display
|
||||
$ memberclient --rpc-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem proposal_display
|
||||
{
|
||||
"1": {
|
||||
"parameter": [...],
|
||||
|
@ -89,7 +89,7 @@ At any stage during the voting process and before the proposal is completed, the
|
|||
|
||||
.. code-block:: bash
|
||||
|
||||
$ memberclient --server-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem withdraw --proposal-id 0
|
||||
$ memberclient --rpc-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem withdraw --proposal-id 0
|
||||
{"commit":110,"global_commit":109,"id":0,"jsonrpc":"2.0","result":true,"term":4}
|
||||
|
||||
This means future votes will be ignored, and the proposal will never be accepted. However it will remain visible as a proposal so members can easily audit historic proposals.
|
||||
|
|
|
@ -1,51 +0,0 @@
|
|||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"properties": {
|
||||
"cert": {
|
||||
"items": {
|
||||
"maximum": 255,
|
||||
"minimum": 0,
|
||||
"type": "number"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"host": {
|
||||
"type": "string"
|
||||
},
|
||||
"nodeport": {
|
||||
"type": "string"
|
||||
},
|
||||
"pubhost": {
|
||||
"type": "string"
|
||||
},
|
||||
"quote": {
|
||||
"items": {
|
||||
"maximum": 255,
|
||||
"minimum": 0,
|
||||
"type": "number"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"rpcport": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"enum": [
|
||||
"PENDING",
|
||||
"TRUSTED",
|
||||
"RETIRED"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"host",
|
||||
"pubhost",
|
||||
"nodeport",
|
||||
"rpcport",
|
||||
"cert",
|
||||
"quote",
|
||||
"status"
|
||||
],
|
||||
"title": "add_node/params",
|
||||
"type": "object"
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"properties": {
|
||||
"id": {
|
||||
"maximum": 18446744073709551615,
|
||||
"minimum": 0,
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id"
|
||||
],
|
||||
"title": "add_node/result",
|
||||
"type": "object"
|
||||
}
|
|
@ -89,11 +89,15 @@ The following diagram summarises the steps required to bootstrap a CCF network:
|
|||
Updating enclave code
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. warning:: Further details required.
|
||||
|
||||
For new nodes to be able to join the network, the version of the code they run (as specified by the ``--enclave-file``) should be first trusted by the consortium of members.
|
||||
|
||||
If the version of the code being executed needs to be updated (for example, to support additional JSON-RPC endpoints), memebrs can create a ``new_code`` proposal, specifying the new code version. Once the proposal has been accepted, nodes running the new code are authorised join the network. This allows stopping nodes running older versions of the code.
|
||||
If the version of the code being executed needs to be updated (for example, to support additional JSON-RPC endpoints), members can create a ``new_code`` proposal, specifying the new code version (e.g. ``3175971c02d00c1a8f9dd23ca89e64955c5caa94e24f4a3a0579dcfb2e6aebf9``):
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
memberclient --cert member_cert --privk member_privk --rpc-address node_ip:node_port --ca network_cert add_code --new-code-id code_version
|
||||
|
||||
Once the proposal has been accepted, nodes running the new code are authorised join the network. This allows stopping nodes running older versions of the code.
|
||||
|
||||
.. note:: It is important to keep the code compatible with the previous version, since there will be a point in time in which the new code is running on at least one node, while the other version is running on a different node.
|
||||
|
||||
|
@ -127,4 +131,4 @@ There is an option to further generate machine-readable logs for monitoring. To
|
|||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#remote_attestation] When a new node joins an existing network, the network performs the remote attestation protocol by verifying the joining node's quote. It also checks that the version of the code running by the joining node known is trusted by the consortium.
|
||||
.. [#remote_attestation] When a new node joins an existing network, the network performs the remote attestation protocol by verifying the joining node's quote. It also checks that the version of the code running by the joining node is trusted by the consortium.
|
||||
|
|
|
@ -157,24 +157,6 @@ void submit_retire_node(RpcTlsClient& tls_connection, NodeId node_id)
|
|||
cout << response.dump() << endl;
|
||||
}
|
||||
|
||||
NodeId submit_add_node(RpcTlsClient& tls_connection, NodeInfo& node_info)
|
||||
{
|
||||
const auto response =
|
||||
json::from_msgpack(tls_connection.call("add_node", node_info));
|
||||
|
||||
cout << response.dump() << endl;
|
||||
|
||||
auto result = response.find("result");
|
||||
if (result == response.end())
|
||||
return INVALID_NODE_ID;
|
||||
|
||||
auto ret_id = result->find("id");
|
||||
if (ret_id == result->end())
|
||||
return INVALID_NODE_ID;
|
||||
|
||||
return *ret_id;
|
||||
}
|
||||
|
||||
void submit_accept_recovery(
|
||||
RpcTlsClient& tls_connection, const string& sealed_secrets_file)
|
||||
{
|
||||
|
@ -323,13 +305,6 @@ int main(int argc, char** argv)
|
|||
auto ack =
|
||||
app.add_subcommand("ack", "Acknowledge self added into the network");
|
||||
|
||||
std::string nodes_file;
|
||||
auto add_node = app.add_subcommand("add_node", "Add a node");
|
||||
add_node
|
||||
->add_option(
|
||||
"--nodes-to-add", nodes_file, "The file containing the nodes to be added")
|
||||
->required(true);
|
||||
|
||||
std::string new_code_id;
|
||||
auto add_code = app.add_subcommand("add_code", "Support executing new code");
|
||||
add_code
|
||||
|
@ -415,22 +390,6 @@ int main(int argc, char** argv)
|
|||
add_new(*tls_connection, user_cert_file, add_user_proposal);
|
||||
}
|
||||
|
||||
if (*add_node)
|
||||
{
|
||||
const auto j_nodes = files::slurp_json(nodes_file);
|
||||
|
||||
if (!j_nodes.is_array())
|
||||
{
|
||||
throw logic_error("Expected " + nodes_file + " to contain array");
|
||||
}
|
||||
|
||||
for (auto node : j_nodes)
|
||||
{
|
||||
NodeInfo node_info = node;
|
||||
submit_add_node(*tls_connection, node_info);
|
||||
}
|
||||
}
|
||||
|
||||
if (*add_code)
|
||||
{
|
||||
submit_accept_code(*tls_connection, new_code_id);
|
||||
|
|
|
@ -117,7 +117,7 @@ namespace enclave
|
|||
::memcpy(quote, r.first.quote.data(), r.first.quote.size());
|
||||
*quote_len = r.first.quote.size();
|
||||
|
||||
if (start_type == StartType::Start || start_type == StartType::Recover)
|
||||
if (start_type == StartType::New || start_type == StartType::Recover)
|
||||
{
|
||||
// When starting a node in start or recover modes, fresh network secrets
|
||||
// are created and the associated certificate can be passed to the host
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
enum StartType
|
||||
{
|
||||
Start = 1,
|
||||
New = 1,
|
||||
Join = 2,
|
||||
Recover = 3
|
||||
};
|
|
@ -325,7 +325,7 @@ int main(int argc, char** argv)
|
|||
if (*start)
|
||||
{
|
||||
LOG_INFO_FMT("Creating new node - new network");
|
||||
start_type = StartType::Start;
|
||||
start_type = StartType::New;
|
||||
ccf_config.genesis.member_certs = files::slurp_certs(member_cert_file);
|
||||
ccf_config.genesis.user_certs = files::slurp_certs(user_cert_file);
|
||||
ccf_config.genesis.gov_script = files::slurp_string(gov_script);
|
||||
|
|
|
@ -208,7 +208,7 @@ namespace ccf
|
|||
|
||||
switch (args.start_type)
|
||||
{
|
||||
case StartType::Start:
|
||||
case StartType::New:
|
||||
{
|
||||
GenesisGenerator g(network);
|
||||
g.init_values();
|
||||
|
|
|
@ -142,20 +142,4 @@ namespace ccf
|
|||
int64_t version; // Current version of the network secrets
|
||||
};
|
||||
};
|
||||
|
||||
// TODO: It seems that we still use this for add_node in memberfrontend.h
|
||||
struct JoinNetwork
|
||||
{
|
||||
struct In
|
||||
{
|
||||
std::vector<uint8_t> network_cert;
|
||||
std::string hostname;
|
||||
std::string service;
|
||||
};
|
||||
|
||||
struct Out
|
||||
{
|
||||
NodeId id;
|
||||
};
|
||||
};
|
||||
}
|
|
@ -41,8 +41,6 @@ namespace ccf
|
|||
static constexpr auto PROPOSE = "propose";
|
||||
static constexpr auto WITHDRAW = "withdraw";
|
||||
|
||||
static constexpr auto ADD_NODE = "add_node";
|
||||
|
||||
static constexpr auto ACK = "ack";
|
||||
static constexpr auto UPDATE_ACK_NONCE = "updateAckNonce";
|
||||
};
|
||||
|
|
|
@ -435,53 +435,6 @@ namespace ccf
|
|||
};
|
||||
install_with_auto_schema<void, bool>(
|
||||
MemberProcs::UPDATE_ACK_NONCE, update_ack_nonce, Write);
|
||||
|
||||
// Add a new node
|
||||
auto add_node = [this](RequestArgs& args) {
|
||||
NodeInfo new_node = args.params;
|
||||
#ifdef GET_QUOTE
|
||||
QuoteVerificationResult verify_result = QuoteVerifier::verify_quote(
|
||||
args.tx, this->network, new_node.quote, new_node.cert);
|
||||
if (verify_result != QuoteVerificationResult::VERIFIED)
|
||||
return QuoteVerifier::quote_verification_error_to_json(verify_result);
|
||||
#endif
|
||||
auto nodes_view = args.tx.get_view(this->network.nodes);
|
||||
NodeId duplicate_node_id = NoNode;
|
||||
nodes_view->foreach([&new_node, &duplicate_node_id](
|
||||
const NodeId& nid, const NodeInfo& ni) {
|
||||
if (
|
||||
new_node.rpcport == ni.rpcport && new_node.host == ni.host &&
|
||||
ni.status != NodeStatus::RETIRED)
|
||||
{
|
||||
duplicate_node_id = nid;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
if (duplicate_node_id != NoNode)
|
||||
return jsonrpc::error(
|
||||
jsonrpc::StandardErrorCodes::INVALID_PARAMS,
|
||||
fmt::format(
|
||||
"A node with the same host {} and port {} already exists (node "
|
||||
"id: {})",
|
||||
new_node.host,
|
||||
new_node.rpcport,
|
||||
duplicate_node_id));
|
||||
const auto node_id = get_next_id(
|
||||
args.tx.get_view(this->network.values), ValueIds::NEXT_NODE_ID);
|
||||
new_node.status = NodeStatus::PENDING;
|
||||
nodes_view->put(node_id, new_node);
|
||||
|
||||
// TODO: We don't use verifier here, is it needed? Perhaps it is
|
||||
// canonicalising the cert?
|
||||
auto verifier = tls::make_verifier(new_node.cert);
|
||||
args.tx.get_view(this->network.node_certs)
|
||||
->put(verifier->raw_cert_data(), node_id);
|
||||
|
||||
return jsonrpc::success(nlohmann::json(JoinNetwork::Out{node_id}));
|
||||
};
|
||||
install_with_auto_schema<NodeInfo, JoinNetwork::Out>(
|
||||
MemberProcs::ADD_NODE, add_node, Write);
|
||||
}
|
||||
};
|
||||
} // namespace ccf
|
||||
|
|
|
@ -7,11 +7,6 @@
|
|||
|
||||
namespace ccf
|
||||
{
|
||||
DECLARE_JSON_TYPE(JoinNetwork::In)
|
||||
DECLARE_JSON_REQUIRED_FIELDS(JoinNetwork::In, network_cert, hostname, service)
|
||||
DECLARE_JSON_TYPE(JoinNetwork::Out)
|
||||
DECLARE_JSON_REQUIRED_FIELDS(JoinNetwork::Out, id)
|
||||
|
||||
DECLARE_JSON_ENUM(
|
||||
GetSignedIndex::State,
|
||||
{{GetSignedIndex::State::ReadingPublicLedger, "readingPublicLedger"},
|
||||
|
|
|
@ -20,29 +20,26 @@ def run(args):
|
|||
) as network:
|
||||
primary, others = network.start_and_join(args)
|
||||
|
||||
# add a valid node
|
||||
res = network.create_and_add_node("libloggingenc", args)
|
||||
assert res[0]
|
||||
new_node = res[1]
|
||||
# TODO: For now, node is added straight away, without validation by
|
||||
# the consortium. See https://github.com/microsoft/CCF/issues/293
|
||||
LOG.debug("Add a valid node")
|
||||
new_node = network.create_and_add_node("libloggingenc", args)
|
||||
|
||||
# attempt to add a node having the host and port fields
|
||||
# similar to a the ones of an existing node
|
||||
with primary.management_client() as mc:
|
||||
check_commit = infra.ccf.Checker(mc)
|
||||
|
||||
with new_node.user_client(format="json") as c:
|
||||
check_commit(
|
||||
c.rpc("LOG_record", {"id": 42, "msg": "Hello world"}), result=True
|
||||
)
|
||||
|
||||
LOG.debug("Add an invalid node (code id not known)")
|
||||
assert (
|
||||
network.add_node(new_node.remote.info()).error["code"]
|
||||
== infra.jsonrpc.ErrorCode.INVALID_PARAMS
|
||||
)
|
||||
network.create_and_add_node("libluagenericenc", args) == None
|
||||
), "Adding node with unknown code id should fail"
|
||||
|
||||
# add an invalid node
|
||||
assert network.create_and_add_node("libluagenericenc", args, False) == (
|
||||
False,
|
||||
infra.jsonrpc.ErrorCode.CODE_ID_NOT_FOUND,
|
||||
)
|
||||
|
||||
new_node.join_network(network)
|
||||
network.wait_for_node_commit_sync()
|
||||
|
||||
# retire a node
|
||||
network.retire_node(1, primary, new_node.node_id)
|
||||
LOG.debug("Retire node")
|
||||
network.retire_node(primary, 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -25,11 +25,11 @@ def get_code_id(lib_path):
|
|||
return lines[0].split("=")[1]
|
||||
|
||||
|
||||
def add_new_code(network, primary, new_code_id):
|
||||
LOG.debug(f"New code id: {new_code_id}")
|
||||
def add_new_code(network, new_code_id):
|
||||
LOG.debug(f"Adding new code id: {new_code_id}")
|
||||
|
||||
# first propose adding the new code id
|
||||
result = network.propose(1, primary, "add_code", f"--new_code_id={new_code_id}")
|
||||
primary, _ = network.find_primary()
|
||||
result = network.propose(1, primary, "add_code", f"--new-code-id={new_code_id}")
|
||||
|
||||
network.vote_using_majority(primary, result[1]["id"], True)
|
||||
|
||||
|
@ -50,53 +50,46 @@ def run(args):
|
|||
) as network:
|
||||
primary, others = network.start_and_join(args)
|
||||
|
||||
forwarded_args = {
|
||||
arg: getattr(args, arg) for arg in infra.ccf.Network.node_args_to_forward
|
||||
}
|
||||
|
||||
res, new_node = network.create_and_add_node(args.package, args, True)
|
||||
assert res
|
||||
new_node.join_network(network)
|
||||
new_node = network.create_and_add_node(args.package, args)
|
||||
assert new_node
|
||||
|
||||
new_code_id = get_code_id(f"{args.patched_file_name}.so.signed")
|
||||
|
||||
# try to add a node using unsupported code
|
||||
assert network.create_and_add_node(args.patched_file_name, args, False) == (
|
||||
False,
|
||||
infra.jsonrpc.ErrorCode.CODE_ID_NOT_FOUND,
|
||||
)
|
||||
LOG.debug(f"Adding a node with unsupported code id {new_code_id}")
|
||||
assert (
|
||||
network.create_and_add_node(args.patched_file_name, args) == None
|
||||
), "Adding node with unsupported code id should fail"
|
||||
|
||||
add_new_code(network, primary, new_code_id)
|
||||
add_new_code(network, new_code_id)
|
||||
|
||||
LOG.debug("Replacing all nodes with previous code version with new code")
|
||||
new_nodes = set()
|
||||
old_nodes_count = len(network.nodes)
|
||||
# add nodes using the same code id that failed earlier
|
||||
for i in range(0, old_nodes_count + 1):
|
||||
LOG.debug(f"Adding node using new code")
|
||||
res, new_node = network.create_and_add_node(args.patched_file_name, args)
|
||||
assert res
|
||||
new_node.join_network(network)
|
||||
new_nodes.add(new_node)
|
||||
|
||||
network.wait_for_node_commit_sync()
|
||||
LOG.debug("Adding more new nodes than originally existed")
|
||||
for _ in range(0, old_nodes_count + 1):
|
||||
new_node = network.create_and_add_node(args.patched_file_name, args)
|
||||
assert new_node
|
||||
new_nodes.add(new_node)
|
||||
|
||||
for node in new_nodes:
|
||||
new_primary = node
|
||||
break
|
||||
|
||||
LOG.debug("Stopping all original nodes")
|
||||
old_nodes = set(network.nodes).difference(new_nodes)
|
||||
for node in old_nodes:
|
||||
LOG.debug(f"Stopping node {node.node_id}")
|
||||
node.stop()
|
||||
|
||||
# wait for a new primary to be elected
|
||||
LOG.debug("Waiting for a new primary to be elected...")
|
||||
time.sleep(args.election_timeout * 6 / 1000)
|
||||
|
||||
new_primary = network.find_primary()[0]
|
||||
new_primary, _ = network.find_primary()
|
||||
LOG.debug(f"Waited, new_primary is {new_primary.node_id}")
|
||||
res, new_node = network.create_and_add_node(args.patched_file_name, args)
|
||||
assert res
|
||||
new_node.join_network(network)
|
||||
|
||||
new_node = network.create_and_add_node(args.patched_file_name, args)
|
||||
assert new_node
|
||||
network.wait_for_node_commit_sync()
|
||||
|
||||
|
||||
|
|
|
@ -73,9 +73,6 @@ def cli_args(add=lambda x: None, parser=None, accept_unknown=False):
|
|||
help="If set, the python client is used to query joining nodes",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--node-status", help="pending, trusted, retired", type=str, action="append"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--election-timeout",
|
||||
help="Maximum election timeout for each node in the network",
|
||||
|
|
|
@ -82,7 +82,7 @@ def run(args):
|
|||
LOG.debug("Stopping primary")
|
||||
primary.stop()
|
||||
|
||||
# Wait for next election to complete
|
||||
LOG.debug("Waiting for a new primary to be elected...")
|
||||
time.sleep(max_election_duration)
|
||||
|
||||
# More than F nodes have been stopped, trying to commit any message
|
||||
|
|
|
@ -115,10 +115,6 @@ class Network:
|
|||
|
||||
hosts = self.hosts or ["localhost"] * number_of_local_nodes()
|
||||
|
||||
node_status = args.node_status or ["pending"] * len(hosts)
|
||||
if len(node_status) != len(hosts):
|
||||
raise ValueError("Node statuses are not equal to number of nodes.")
|
||||
|
||||
if not args.package:
|
||||
raise ValueError("A package name must be specified.")
|
||||
|
||||
|
@ -140,22 +136,25 @@ class Network:
|
|||
arg: dict_args[arg] for arg in Network.node_args_to_forward
|
||||
}
|
||||
try:
|
||||
primary, _ = self.find_primary() if i != 0 else (None, None)
|
||||
node.start(
|
||||
infra.remote.StartType.start
|
||||
if i == 0
|
||||
else infra.remote.StartType.join,
|
||||
lib_name=args.package,
|
||||
node_status=node_status[i],
|
||||
workspace=args.workspace,
|
||||
label=args.label,
|
||||
target_rpc_address=f"{primary.host}:{primary.rpc_port}"
|
||||
if primary
|
||||
else None,
|
||||
members_certs="member*_cert.pem" if i == 0 else None,
|
||||
users_certs="user*_cert.pem" if i == 0 else None,
|
||||
**forwarded_args,
|
||||
)
|
||||
if i == 0:
|
||||
node.start(
|
||||
lib_name=args.package,
|
||||
workspace=args.workspace,
|
||||
label=args.label,
|
||||
members_certs="member*_cert.pem",
|
||||
users_certs="user*_cert.pem",
|
||||
**forwarded_args,
|
||||
)
|
||||
else:
|
||||
primary, _ = self.find_primary()
|
||||
node.join(
|
||||
lib_name=args.package,
|
||||
workspace=args.workspace,
|
||||
label=args.label,
|
||||
target_rpc_address=f"{primary.host}:{primary.rpc_port}",
|
||||
**forwarded_args,
|
||||
)
|
||||
node.wait_for_node_to_join()
|
||||
node.network_state = NodeNetworkState.joined
|
||||
except Exception:
|
||||
LOG.exception("Failed to start node {}".format(i))
|
||||
|
@ -165,7 +164,7 @@ class Network:
|
|||
if primary is None:
|
||||
primary = self.nodes[0]
|
||||
|
||||
self.wait_for_all_nodes_have_joined(primary)
|
||||
self.wait_for_all_nodes_to_catch_up(primary)
|
||||
self.check_for_service(primary)
|
||||
LOG.success("All nodes joined network")
|
||||
|
||||
|
@ -174,10 +173,6 @@ class Network:
|
|||
def start_in_recovery(self, args, ledger_file, sealed_secrets):
|
||||
hosts = self.hosts or ["localhost"] * number_of_local_nodes()
|
||||
|
||||
node_status = args.node_status or ["pending"] * len(hosts)
|
||||
if len(node_status) != len(hosts):
|
||||
raise ValueError("Node statuses are not equal to number of nodes.")
|
||||
|
||||
if not args.package:
|
||||
raise ValueError("A package name must be specified.")
|
||||
|
||||
|
@ -201,10 +196,8 @@ class Network:
|
|||
# start all nodes with their own ledger to find out which ledger
|
||||
# is the longest. Then, all nodes except the ones with the
|
||||
# longest ledger are stopped and restarted in "join".
|
||||
self.nodes[0].start(
|
||||
start_type=infra.remote.StartType.recover,
|
||||
self.nodes[0].recover(
|
||||
lib_name=args.package,
|
||||
node_status=node_status[0],
|
||||
ledger_file=ledger_file,
|
||||
sealed_secrets=sealed_secrets,
|
||||
workspace=args.workspace,
|
||||
|
@ -216,20 +209,19 @@ class Network:
|
|||
LOG.exception("Failed to start recovery node {}".format(i))
|
||||
raise
|
||||
|
||||
for i, node in enumerate(self.nodes):
|
||||
for node in self.nodes:
|
||||
if node != primary:
|
||||
node.start(
|
||||
infra.remote.StartType.join,
|
||||
node.join(
|
||||
lib_name=args.package,
|
||||
node_status=node_status[i],
|
||||
workspace=args.workspace,
|
||||
label=args.label,
|
||||
target_rpc_address=f"{primary.host}:{primary.rpc_port}",
|
||||
**forwarded_args,
|
||||
)
|
||||
node.wait_for_node_to_join()
|
||||
node.network_state = NodeNetworkState.joined
|
||||
|
||||
self.wait_for_all_nodes_have_joined(primary)
|
||||
self.wait_for_all_nodes_to_catch_up(primary)
|
||||
self.check_for_service(primary, status=ServiceStatus.OPENING)
|
||||
|
||||
LOG.success("All nodes joined recoverd public network")
|
||||
|
@ -274,40 +266,33 @@ class Network:
|
|||
def remove_last_node(self):
|
||||
last_node = self.nodes.pop()
|
||||
|
||||
def add_node(self, new_node_info):
|
||||
with self.find_primary()[0].member_client(format="json") as member_client:
|
||||
j_result = member_client.rpc("add_node", new_node_info)
|
||||
|
||||
return j_result
|
||||
|
||||
def create_and_add_node(
|
||||
self, lib_name, args, should_succeed=True, local_node_id=None
|
||||
):
|
||||
def create_and_add_node(self, lib_name, args):
|
||||
forwarded_args = {
|
||||
arg: getattr(args, arg) for arg in infra.ccf.Network.node_args_to_forward
|
||||
}
|
||||
if local_node_id is None:
|
||||
local_node_id = self.get_next_local_node_id()
|
||||
node_status = args.node_status or "pending"
|
||||
local_node_id = self.get_next_local_node_id()
|
||||
new_node = self.create_node(local_node_id, "localhost")
|
||||
new_node.start(
|
||||
|
||||
primary, _ = self.find_primary()
|
||||
new_node.join(
|
||||
lib_name=lib_name,
|
||||
node_status=node_status,
|
||||
workspace=args.workspace,
|
||||
label=args.label,
|
||||
target_rpc_address=f"{primary.host}:{primary.rpc_port}",
|
||||
**forwarded_args,
|
||||
)
|
||||
new_node_info = new_node.remote.info()
|
||||
|
||||
j_result = self.add_node(new_node_info)
|
||||
try:
|
||||
new_node.wait_for_node_to_join()
|
||||
except RuntimeError:
|
||||
LOG.error(f"New node {local_node_id} failed to join the network")
|
||||
self.nodes.remove(new_node)
|
||||
return None
|
||||
|
||||
if j_result.error is not None:
|
||||
self.remove_last_node()
|
||||
return (False, j_result.error["code"])
|
||||
|
||||
new_node.node_id = j_result.result["id"]
|
||||
|
||||
return (True, new_node)
|
||||
new_node.network_state = NodeNetworkState.joined
|
||||
self.wait_for_all_nodes_to_catch_up(primary)
|
||||
LOG.success(f"New node {local_node_id} joined the network")
|
||||
return new_node
|
||||
|
||||
def add_members(self, members):
|
||||
self.members.extend(members)
|
||||
|
@ -379,13 +364,14 @@ class Network:
|
|||
member_id, remote_node, "retire_node", f"--node-id={node_id}"
|
||||
)
|
||||
|
||||
def retire_node(self, member_id, remote_node, node_id):
|
||||
def retire_node(self, remote_node, node_id):
|
||||
member_id = 1
|
||||
result = self.propose_retire_node(member_id, remote_node, node_id)
|
||||
proposal_id = result[1]["id"]
|
||||
result = self.vote_using_majority(remote_node, proposal_id, True)
|
||||
|
||||
with remote_node.member_client() as c:
|
||||
id = c.request("read", {"table": "nodes", "key": node_id})
|
||||
id = c.request("read", {"table": "ccf.nodes", "key": node_id})
|
||||
assert c.response(id).result["status"].decode() == "RETIRED"
|
||||
|
||||
def propose_add_member(self, member_id, remote_node, new_member_cert):
|
||||
|
@ -418,7 +404,8 @@ class Network:
|
|||
|
||||
def find_primary(self):
|
||||
"""
|
||||
Find the identity of the primary in the network and return its identity and the current term.
|
||||
Find the identity of the primary in the network and return its identity
|
||||
and the current term.
|
||||
"""
|
||||
primary_id = None
|
||||
term = None
|
||||
|
@ -439,49 +426,12 @@ class Network:
|
|||
|
||||
return (self.get_node_by_id(primary_id), term)
|
||||
|
||||
def update_nodes(self):
|
||||
primary = self.find_primary()[0]
|
||||
with primary.management_client() as c:
|
||||
id = c.request("getNetworkInfo", {})
|
||||
res = c.response(id)
|
||||
|
||||
# this is a json array of all the nodes in TRUSTED state
|
||||
active_nodes = res.result["nodes"]
|
||||
|
||||
active_local_nodes = list(filter(lambda node: node.is_joined(), self.nodes))
|
||||
assert len(active_nodes) == len(
|
||||
active_local_nodes
|
||||
), f"active node count ({len(active_nodes)}) does not match active local nodes ({len(active_local_nodes)})"
|
||||
|
||||
for node in active_nodes:
|
||||
port = int(node["port"].decode())
|
||||
local_node = next(
|
||||
(
|
||||
local_node
|
||||
for local_node in active_local_nodes
|
||||
if local_node.rpc_port == port
|
||||
),
|
||||
None,
|
||||
)
|
||||
# make sure we know all the nodes
|
||||
assert (
|
||||
local_node
|
||||
), f"The node {str(node['host'])}:{port} is not known to the local network environment"
|
||||
|
||||
node_id = int(node["node_id"])
|
||||
if local_node.node_id != node_id:
|
||||
local_node.node_id = node_id
|
||||
LOG.info(
|
||||
"Correcting node id for {local_node.node_id} to be {node_id}"
|
||||
)
|
||||
|
||||
def wait_for_all_nodes_have_joined(self, primary, timeout=3):
|
||||
def wait_for_all_nodes_to_catch_up(self, primary, timeout=3):
|
||||
"""
|
||||
Wait for all nodes to have joined the network and globally replicated
|
||||
all transactions executed on the primary (including the transactions
|
||||
which added the nodes).
|
||||
"""
|
||||
|
||||
with primary.management_client() as c:
|
||||
res = c.do("getCommit", {})
|
||||
local_commit_leader = res.commit
|
||||
|
@ -489,7 +439,7 @@ class Network:
|
|||
|
||||
for _ in range(timeout):
|
||||
joined_nodes = 0
|
||||
for node in (node for node in self.nodes if node.is_joined()):
|
||||
for node in self.get_running_nodes():
|
||||
with node.management_client() as c:
|
||||
id = c.request("getCommit", {})
|
||||
resp = c.response(id)
|
||||
|
@ -501,12 +451,12 @@ class Network:
|
|||
and resp.result["term"] == term_leader
|
||||
):
|
||||
joined_nodes += 1
|
||||
if joined_nodes == len(self.nodes):
|
||||
if joined_nodes == self.get_running_nodes():
|
||||
break
|
||||
time.sleep(1)
|
||||
assert joined_nodes == len(
|
||||
self.nodes
|
||||
), f"Only {joined_nodes} (out of {len(self.nodes)}) nodes have joined the network"
|
||||
self.get_running_nodes()
|
||||
), f"Only {joined_nodes} (out of {self.get_running_nodes()}) nodes have joined the network"
|
||||
|
||||
def wait_for_node_commit_sync(self, timeout=3):
|
||||
"""
|
||||
|
@ -642,6 +592,51 @@ class Node:
|
|||
self.node_port = probably_free_function(self.host)
|
||||
|
||||
def start(
|
||||
self,
|
||||
lib_name,
|
||||
enclave_type,
|
||||
workspace,
|
||||
label,
|
||||
members_certs,
|
||||
users_certs,
|
||||
**kwargs,
|
||||
):
|
||||
self._start(
|
||||
infra.remote.StartType.new,
|
||||
lib_name,
|
||||
enclave_type,
|
||||
workspace,
|
||||
label,
|
||||
None,
|
||||
members_certs,
|
||||
users_certs,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def join(
|
||||
self, lib_name, enclave_type, workspace, label, target_rpc_address, **kwargs
|
||||
):
|
||||
self._start(
|
||||
infra.remote.StartType.join,
|
||||
lib_name,
|
||||
enclave_type,
|
||||
workspace,
|
||||
label,
|
||||
target_rpc_address,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def recover(self, lib_name, enclave_type, workspace, label, **kwargs):
|
||||
self._start(
|
||||
infra.remote.StartType.recover,
|
||||
lib_name,
|
||||
enclave_type,
|
||||
workspace,
|
||||
label,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _start(
|
||||
self,
|
||||
start_type,
|
||||
lib_name,
|
||||
|
@ -712,10 +707,19 @@ class Node:
|
|||
|
||||
def is_joined(self):
|
||||
return self.network_state == NodeNetworkState.joined
|
||||
# TODO: Address network_state - what is it used for?
|
||||
|
||||
def restart(self):
|
||||
self.remote.restart()
|
||||
def wait_for_node_to_join(self, timeout=3):
|
||||
"""
|
||||
This function can be used to check that a node has successfully
|
||||
joined a network and that it is part of the consensus.
|
||||
"""
|
||||
for _ in range(timeout):
|
||||
with self.management_client() as mc:
|
||||
rep = mc.do("getCommit", {})
|
||||
if rep.error == None and rep.result is not None:
|
||||
return
|
||||
time.sleep(1)
|
||||
raise RuntimeError(f"Node {self.node_id} failed to join the network")
|
||||
|
||||
def get_sealed_secrets(self):
|
||||
return self.remote.get_sealed_secrets()
|
||||
|
|
|
@ -128,7 +128,6 @@ class SSHRemote(CmdMixin):
|
|||
setup() connects, creates the directory and ships over the files
|
||||
start() runs the specified command
|
||||
stop() disconnects, which shuts down the command via SIGHUP
|
||||
restart() reconnects and reruns the specified command
|
||||
"""
|
||||
self.hostname = hostname
|
||||
# For SSHRemote, both executable files (host and enclave) and data
|
||||
|
@ -248,10 +247,6 @@ class SSHRemote(CmdMixin):
|
|||
)
|
||||
self.client.close()
|
||||
|
||||
def restart(self):
|
||||
self._connect()
|
||||
self.start()
|
||||
|
||||
def setup(self):
|
||||
"""
|
||||
Connect to the remote host, empty the temporary directory if it exsits,
|
||||
|
@ -407,9 +402,6 @@ class LocalRemote(CmdMixin):
|
|||
self.stderr.close()
|
||||
log_errors(self.out, self.err)
|
||||
|
||||
def restart(self):
|
||||
self.start()
|
||||
|
||||
def setup(self):
|
||||
"""
|
||||
Empty the temporary directory if it exists,
|
||||
|
@ -477,7 +469,6 @@ class CCFRemote(object):
|
|||
ignore_quote=False,
|
||||
sig_max_tx=1000,
|
||||
sig_max_ms=1000,
|
||||
node_status="pending",
|
||||
election_timeout=1000,
|
||||
memory_reserve_startup=0,
|
||||
notify_server=None,
|
||||
|
@ -498,7 +489,6 @@ class CCFRemote(object):
|
|||
self.rpc_port = rpc_port
|
||||
self.pem = "{}.pem".format(local_node_id)
|
||||
self.quote = None
|
||||
self.node_status = node_status
|
||||
# Only expect a quote if the enclave is not virtual and quotes have
|
||||
# not been explictly ignored
|
||||
if enclave_type != "virtual" and not ignore_quote:
|
||||
|
@ -559,7 +549,7 @@ class CCFRemote(object):
|
|||
if self.quote:
|
||||
cmd += [f"--quote-file={self.quote}"]
|
||||
|
||||
if start_type == StartType.start:
|
||||
if start_type == StartType.new:
|
||||
cmd += [
|
||||
"start",
|
||||
"--network-cert-file=networkcert.pem",
|
||||
|
@ -616,32 +606,9 @@ class CCFRemote(object):
|
|||
def start(self):
|
||||
self.remote.start()
|
||||
self.remote.get(self.pem)
|
||||
if self.start_type in {StartType.start, StartType.recover}:
|
||||
if self.start_type in {StartType.new, StartType.recover}:
|
||||
self.remote.get("networkcert.pem")
|
||||
|
||||
def restart(self):
|
||||
self.remote.restart()
|
||||
|
||||
def info(self):
|
||||
self.remote.get(self.pem)
|
||||
quote_bytes = []
|
||||
if self.quote:
|
||||
self.remote.get(self.quote)
|
||||
quote_bytes = infra.path.quote_bytes(self.quote)
|
||||
|
||||
return {
|
||||
"host": self.host,
|
||||
"nodeport": str(self.node_port),
|
||||
"pubhost": self.pubhost,
|
||||
"rpcport": str(self.rpc_port),
|
||||
"cert": infra.path.cert_bytes(self.pem),
|
||||
"quote": quote_bytes,
|
||||
"status": NodeStatus[self.node_status].value,
|
||||
}
|
||||
|
||||
def node_cmd(self):
|
||||
return self.remote._cmd()
|
||||
|
||||
def debug_node_cmd(self):
|
||||
return self.remote._dbg()
|
||||
|
||||
|
@ -710,6 +677,6 @@ class NodeStatus(Enum):
|
|||
|
||||
|
||||
class StartType(Enum):
|
||||
start = 0
|
||||
new = 0
|
||||
join = 1
|
||||
recover = 2
|
||||
|
|
|
@ -71,12 +71,6 @@ class CCFRemoteClient(object):
|
|||
def start(self):
|
||||
self.remote.start()
|
||||
|
||||
def restart(self):
|
||||
self.remote.restart()
|
||||
|
||||
def node_cmd(self):
|
||||
return self.remote._cmd()
|
||||
|
||||
def debug_node_cmd(self):
|
||||
return self.remote._dbg()
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче