Re-enable code update and add node tests (#362)

This commit is contained in:
Julien Maffre 2019-09-10 11:34:21 +01:00 коммит произвёл GitHub
Родитель 0ce7460cfa
Коммит 30c1c230fb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
21 изменённых файлов: 183 добавлений и 408 удалений

Просмотреть файл

@ -283,22 +283,20 @@ if(BUILD_TESTS)
--oesign ${OESIGN}
)
# TODO: Re-enable these tests
# add_e2e_test(
# NAME add_node_test
# PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/addnode.py
# )
add_e2e_test(
NAME add_node_test
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/addnode.py
)
## Code update test
# add_e2e_test(
# NAME code_update_test
# PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/code_update.py
# ADDITIONAL_ARGS
# --oesign ${OESIGN}
# --oeconfpath ${CMAKE_CURRENT_SOURCE_DIR}/src/apps/logging/oe_sign.conf
# --oesignkeypath ${CMAKE_CURRENT_SOURCE_DIR}/src/apps/sample_key.pem
# --election-timeout 1500
# )
add_e2e_test(
NAME code_update_test
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/code_update.py
ADDITIONAL_ARGS
--oesign ${OESIGN}
--oeconfpath ${CMAKE_CURRENT_SOURCE_DIR}/src/apps/logging/oe_sign.conf
--oesignkeypath ${CMAKE_CURRENT_SOURCE_DIR}/src/apps/sample_key.pem
--election-timeout 1500
)
endif()
add_e2e_test(
@ -313,8 +311,6 @@ if(BUILD_TESTS)
--scenario ${CMAKE_SOURCE_DIR}/tests/simple_logging_scenario.json
)
# TODO: These tests make use of dynamic node configuration that is not
# yet supported by PBFT
add_e2e_test(
NAME election_tests
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/election.py

Просмотреть файл

@ -20,7 +20,7 @@ For example, ``member1`` may submit a proposal to add a new member (``member4``)
.. code-block:: bash
$ memberclient --server-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem add_member --member-cert member4_cert.pem
$ memberclient --rpc-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem add_member --member-cert member4_cert.pem
{"commit":100,"global_commit":99,"id":0,"jsonrpc":"2.0","result":{"completed":false,"id":1},"term":2}
In this case, a new proposal with id ``1`` has successfully been created and the proposer member has automatically accepted it. Other members can then accept or reject the proposal:
@ -30,12 +30,12 @@ In this case, a new proposal with id ``1`` has successfully been created and the
// Proposal 1 is already created by member 1 (votes: 1/3)
// Member 2 rejects the proposal (votes: 1/3)
$ memberclient --server-address 127.83.203.69:55526 --cert member2_cert.pem --privk member2_privk.pem --ca networkcert.pem vote --reject --proposal-id 1
$ memberclient --rpc-address 127.83.203.69:55526 --cert member2_cert.pem --privk member2_privk.pem --ca networkcert.pem vote --reject --proposal-id 1
{"commit":104,"global_commit":103,"id":0,"jsonrpc":"2.0","result":false,"term":2}
// Member 3 accepts the proposal (votes: 2/3)
// As a quorum of members have accepted the proposal, member4 is added to the consortium
$ memberclient --server-address 127.83.203.69:55526 --cert member3_cert.pem --privk member3_privk.pem --ca networkcert.pem vote --accept --proposal-id 1
$ memberclient --rpc-address 127.83.203.69:55526 --cert member3_cert.pem --privk member3_privk.pem --ca networkcert.pem vote --accept --proposal-id 1
{"commit":106,"global_commit":105,"id":0,"jsonrpc":"2.0","result":true,"term":2}
As soon as ``member3`` accepts the proposal, a quorum (2 out of 3) of members has been reached and the proposal completes, successfully adding ``member4``.
@ -44,7 +44,7 @@ As soon as ``member3`` accepts the proposal, a quorum (2 out of 3) of members ha
.. code-block:: bash
$ memberclient --server-address 127.83.203.69:55526 --cert member4_cert.pem --privk member4_privk.pem --ca networkcert.pem ack
$ memberclient --rpc-address 127.83.203.69:55526 --cert member4_cert.pem --privk member4_privk.pem --ca networkcert.pem ack
{"commit":108,"global_commit":107,"id":2,"jsonrpc":"2.0","result":true,"term":2}
@ -55,7 +55,7 @@ The details of pending proposals, including the proposer member ID, proposal scr
.. code-block:: bash
$ memberclient --server-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem proposal_display
$ memberclient --rpc-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem proposal_display
{
"1": {
"parameter": [...],
@ -89,7 +89,7 @@ At any stage during the voting process and before the proposal is completed, the
.. code-block:: bash
$ memberclient --server-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem withdraw --proposal-id 0
$ memberclient --rpc-address 127.83.203.69:55526 --cert member1_cert.pem --privk member1_privk.pem --ca networkcert.pem withdraw --proposal-id 0
{"commit":110,"global_commit":109,"id":0,"jsonrpc":"2.0","result":true,"term":4}
This means future votes will be ignored, and the proposal will never be accepted. However it will remain visible as a proposal so members can easily audit historic proposals.

Просмотреть файл

@ -1,51 +0,0 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"properties": {
"cert": {
"items": {
"maximum": 255,
"minimum": 0,
"type": "number"
},
"type": "array"
},
"host": {
"type": "string"
},
"nodeport": {
"type": "string"
},
"pubhost": {
"type": "string"
},
"quote": {
"items": {
"maximum": 255,
"minimum": 0,
"type": "number"
},
"type": "array"
},
"rpcport": {
"type": "string"
},
"status": {
"enum": [
"PENDING",
"TRUSTED",
"RETIRED"
]
}
},
"required": [
"host",
"pubhost",
"nodeport",
"rpcport",
"cert",
"quote",
"status"
],
"title": "add_node/params",
"type": "object"
}

Просмотреть файл

@ -1,15 +0,0 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"properties": {
"id": {
"maximum": 18446744073709551615,
"minimum": 0,
"type": "number"
}
},
"required": [
"id"
],
"title": "add_node/result",
"type": "object"
}

Просмотреть файл

@ -89,11 +89,15 @@ The following diagram summarises the steps required to bootstrap a CCF network:
Updating enclave code
~~~~~~~~~~~~~~~~~~~~~
.. warning:: Further details required.
For new nodes to be able to join the network, the version of the code they run (as specified by the ``--enclave-file``) should be first trusted by the consortium of members.
If the version of the code being executed needs to be updated (for example, to support additional JSON-RPC endpoints), memebrs can create a ``new_code`` proposal, specifying the new code version. Once the proposal has been accepted, nodes running the new code are authorised join the network. This allows stopping nodes running older versions of the code.
If the version of the code being executed needs to be updated (for example, to support additional JSON-RPC endpoints), members can create a ``new_code`` proposal, specifying the new code version (e.g. ``3175971c02d00c1a8f9dd23ca89e64955c5caa94e24f4a3a0579dcfb2e6aebf9``):
.. code-block:: bash
memberclient --cert member_cert --privk member_privk --rpc-address node_ip:node_port --ca network_cert add_code --new-code-id code_version
Once the proposal has been accepted, nodes running the new code are authorised join the network. This allows stopping nodes running older versions of the code.
.. note:: It is important to keep the code compatible with the previous version, since there will be a point in time in which the new code is running on at least one node, while the other version is running on a different node.
@ -127,4 +131,4 @@ There is an option to further generate machine-readable logs for monitoring. To
.. rubric:: Footnotes
.. [#remote_attestation] When a new node joins an existing network, the network performs the remote attestation protocol by verifying the joining node's quote. It also checks that the version of the code running by the joining node known is trusted by the consortium.
.. [#remote_attestation] When a new node joins an existing network, the network performs the remote attestation protocol by verifying the joining node's quote. It also checks that the version of the code running by the joining node is trusted by the consortium.

Просмотреть файл

@ -157,24 +157,6 @@ void submit_retire_node(RpcTlsClient& tls_connection, NodeId node_id)
cout << response.dump() << endl;
}
NodeId submit_add_node(RpcTlsClient& tls_connection, NodeInfo& node_info)
{
const auto response =
json::from_msgpack(tls_connection.call("add_node", node_info));
cout << response.dump() << endl;
auto result = response.find("result");
if (result == response.end())
return INVALID_NODE_ID;
auto ret_id = result->find("id");
if (ret_id == result->end())
return INVALID_NODE_ID;
return *ret_id;
}
void submit_accept_recovery(
RpcTlsClient& tls_connection, const string& sealed_secrets_file)
{
@ -323,13 +305,6 @@ int main(int argc, char** argv)
auto ack =
app.add_subcommand("ack", "Acknowledge self added into the network");
std::string nodes_file;
auto add_node = app.add_subcommand("add_node", "Add a node");
add_node
->add_option(
"--nodes-to-add", nodes_file, "The file containing the nodes to be added")
->required(true);
std::string new_code_id;
auto add_code = app.add_subcommand("add_code", "Support executing new code");
add_code
@ -415,22 +390,6 @@ int main(int argc, char** argv)
add_new(*tls_connection, user_cert_file, add_user_proposal);
}
if (*add_node)
{
const auto j_nodes = files::slurp_json(nodes_file);
if (!j_nodes.is_array())
{
throw logic_error("Expected " + nodes_file + " to contain array");
}
for (auto node : j_nodes)
{
NodeInfo node_info = node;
submit_add_node(*tls_connection, node_info);
}
}
if (*add_code)
{
submit_accept_code(*tls_connection, new_code_id);

Просмотреть файл

@ -117,7 +117,7 @@ namespace enclave
::memcpy(quote, r.first.quote.data(), r.first.quote.size());
*quote_len = r.first.quote.size();
if (start_type == StartType::Start || start_type == StartType::Recover)
if (start_type == StartType::New || start_type == StartType::Recover)
{
// When starting a node in start or recover modes, fresh network secrets
// are created and the associated certificate can be passed to the host

Просмотреть файл

@ -4,7 +4,7 @@
enum StartType
{
Start = 1,
New = 1,
Join = 2,
Recover = 3
};

Просмотреть файл

@ -325,7 +325,7 @@ int main(int argc, char** argv)
if (*start)
{
LOG_INFO_FMT("Creating new node - new network");
start_type = StartType::Start;
start_type = StartType::New;
ccf_config.genesis.member_certs = files::slurp_certs(member_cert_file);
ccf_config.genesis.user_certs = files::slurp_certs(user_cert_file);
ccf_config.genesis.gov_script = files::slurp_string(gov_script);

Просмотреть файл

@ -208,7 +208,7 @@ namespace ccf
switch (args.start_type)
{
case StartType::Start:
case StartType::New:
{
GenesisGenerator g(network);
g.init_values();

Просмотреть файл

@ -142,20 +142,4 @@ namespace ccf
int64_t version; // Current version of the network secrets
};
};
// TODO: It seems that we still use this for add_node in memberfrontend.h
struct JoinNetwork
{
struct In
{
std::vector<uint8_t> network_cert;
std::string hostname;
std::string service;
};
struct Out
{
NodeId id;
};
};
}

Просмотреть файл

@ -41,8 +41,6 @@ namespace ccf
static constexpr auto PROPOSE = "propose";
static constexpr auto WITHDRAW = "withdraw";
static constexpr auto ADD_NODE = "add_node";
static constexpr auto ACK = "ack";
static constexpr auto UPDATE_ACK_NONCE = "updateAckNonce";
};

Просмотреть файл

@ -435,53 +435,6 @@ namespace ccf
};
install_with_auto_schema<void, bool>(
MemberProcs::UPDATE_ACK_NONCE, update_ack_nonce, Write);
// Add a new node
auto add_node = [this](RequestArgs& args) {
NodeInfo new_node = args.params;
#ifdef GET_QUOTE
QuoteVerificationResult verify_result = QuoteVerifier::verify_quote(
args.tx, this->network, new_node.quote, new_node.cert);
if (verify_result != QuoteVerificationResult::VERIFIED)
return QuoteVerifier::quote_verification_error_to_json(verify_result);
#endif
auto nodes_view = args.tx.get_view(this->network.nodes);
NodeId duplicate_node_id = NoNode;
nodes_view->foreach([&new_node, &duplicate_node_id](
const NodeId& nid, const NodeInfo& ni) {
if (
new_node.rpcport == ni.rpcport && new_node.host == ni.host &&
ni.status != NodeStatus::RETIRED)
{
duplicate_node_id = nid;
return false;
}
return true;
});
if (duplicate_node_id != NoNode)
return jsonrpc::error(
jsonrpc::StandardErrorCodes::INVALID_PARAMS,
fmt::format(
"A node with the same host {} and port {} already exists (node "
"id: {})",
new_node.host,
new_node.rpcport,
duplicate_node_id));
const auto node_id = get_next_id(
args.tx.get_view(this->network.values), ValueIds::NEXT_NODE_ID);
new_node.status = NodeStatus::PENDING;
nodes_view->put(node_id, new_node);
// TODO: We don't use verifier here, is it needed? Perhaps it is
// canonicalising the cert?
auto verifier = tls::make_verifier(new_node.cert);
args.tx.get_view(this->network.node_certs)
->put(verifier->raw_cert_data(), node_id);
return jsonrpc::success(nlohmann::json(JoinNetwork::Out{node_id}));
};
install_with_auto_schema<NodeInfo, JoinNetwork::Out>(
MemberProcs::ADD_NODE, add_node, Write);
}
};
} // namespace ccf

Просмотреть файл

@ -7,11 +7,6 @@
namespace ccf
{
DECLARE_JSON_TYPE(JoinNetwork::In)
DECLARE_JSON_REQUIRED_FIELDS(JoinNetwork::In, network_cert, hostname, service)
DECLARE_JSON_TYPE(JoinNetwork::Out)
DECLARE_JSON_REQUIRED_FIELDS(JoinNetwork::Out, id)
DECLARE_JSON_ENUM(
GetSignedIndex::State,
{{GetSignedIndex::State::ReadingPublicLedger, "readingPublicLedger"},

Просмотреть файл

@ -20,29 +20,26 @@ def run(args):
) as network:
primary, others = network.start_and_join(args)
# add a valid node
res = network.create_and_add_node("libloggingenc", args)
assert res[0]
new_node = res[1]
# TODO: For now, node is added straight away, without validation by
# the consortium. See https://github.com/microsoft/CCF/issues/293
LOG.debug("Add a valid node")
new_node = network.create_and_add_node("libloggingenc", args)
# attempt to add a node having the host and port fields
# similar to a the ones of an existing node
with primary.management_client() as mc:
check_commit = infra.ccf.Checker(mc)
with new_node.user_client(format="json") as c:
check_commit(
c.rpc("LOG_record", {"id": 42, "msg": "Hello world"}), result=True
)
LOG.debug("Add an invalid node (code id not known)")
assert (
network.add_node(new_node.remote.info()).error["code"]
== infra.jsonrpc.ErrorCode.INVALID_PARAMS
)
network.create_and_add_node("libluagenericenc", args) == None
), "Adding node with unknown code id should fail"
# add an invalid node
assert network.create_and_add_node("libluagenericenc", args, False) == (
False,
infra.jsonrpc.ErrorCode.CODE_ID_NOT_FOUND,
)
new_node.join_network(network)
network.wait_for_node_commit_sync()
# retire a node
network.retire_node(1, primary, new_node.node_id)
LOG.debug("Retire node")
network.retire_node(primary, 0)
if __name__ == "__main__":

Просмотреть файл

@ -25,11 +25,11 @@ def get_code_id(lib_path):
return lines[0].split("=")[1]
def add_new_code(network, primary, new_code_id):
LOG.debug(f"New code id: {new_code_id}")
def add_new_code(network, new_code_id):
LOG.debug(f"Adding new code id: {new_code_id}")
# first propose adding the new code id
result = network.propose(1, primary, "add_code", f"--new_code_id={new_code_id}")
primary, _ = network.find_primary()
result = network.propose(1, primary, "add_code", f"--new-code-id={new_code_id}")
network.vote_using_majority(primary, result[1]["id"], True)
@ -50,53 +50,46 @@ def run(args):
) as network:
primary, others = network.start_and_join(args)
forwarded_args = {
arg: getattr(args, arg) for arg in infra.ccf.Network.node_args_to_forward
}
res, new_node = network.create_and_add_node(args.package, args, True)
assert res
new_node.join_network(network)
new_node = network.create_and_add_node(args.package, args)
assert new_node
new_code_id = get_code_id(f"{args.patched_file_name}.so.signed")
# try to add a node using unsupported code
assert network.create_and_add_node(args.patched_file_name, args, False) == (
False,
infra.jsonrpc.ErrorCode.CODE_ID_NOT_FOUND,
)
LOG.debug(f"Adding a node with unsupported code id {new_code_id}")
assert (
network.create_and_add_node(args.patched_file_name, args) == None
), "Adding node with unsupported code id should fail"
add_new_code(network, primary, new_code_id)
add_new_code(network, new_code_id)
LOG.debug("Replacing all nodes with previous code version with new code")
new_nodes = set()
old_nodes_count = len(network.nodes)
# add nodes using the same code id that failed earlier
for i in range(0, old_nodes_count + 1):
LOG.debug(f"Adding node using new code")
res, new_node = network.create_and_add_node(args.patched_file_name, args)
assert res
new_node.join_network(network)
new_nodes.add(new_node)
network.wait_for_node_commit_sync()
LOG.debug("Adding more new nodes than originally existed")
for _ in range(0, old_nodes_count + 1):
new_node = network.create_and_add_node(args.patched_file_name, args)
assert new_node
new_nodes.add(new_node)
for node in new_nodes:
new_primary = node
break
LOG.debug("Stopping all original nodes")
old_nodes = set(network.nodes).difference(new_nodes)
for node in old_nodes:
LOG.debug(f"Stopping node {node.node_id}")
node.stop()
# wait for a new primary to be elected
LOG.debug("Waiting for a new primary to be elected...")
time.sleep(args.election_timeout * 6 / 1000)
new_primary = network.find_primary()[0]
new_primary, _ = network.find_primary()
LOG.debug(f"Waited, new_primary is {new_primary.node_id}")
res, new_node = network.create_and_add_node(args.patched_file_name, args)
assert res
new_node.join_network(network)
new_node = network.create_and_add_node(args.patched_file_name, args)
assert new_node
network.wait_for_node_commit_sync()

Просмотреть файл

@ -73,9 +73,6 @@ def cli_args(add=lambda x: None, parser=None, accept_unknown=False):
help="If set, the python client is used to query joining nodes",
action="store_true",
)
parser.add_argument(
"--node-status", help="pending, trusted, retired", type=str, action="append"
)
parser.add_argument(
"--election-timeout",
help="Maximum election timeout for each node in the network",

Просмотреть файл

@ -82,7 +82,7 @@ def run(args):
LOG.debug("Stopping primary")
primary.stop()
# Wait for next election to complete
LOG.debug("Waiting for a new primary to be elected...")
time.sleep(max_election_duration)
# More than F nodes have been stopped, trying to commit any message

Просмотреть файл

@ -115,10 +115,6 @@ class Network:
hosts = self.hosts or ["localhost"] * number_of_local_nodes()
node_status = args.node_status or ["pending"] * len(hosts)
if len(node_status) != len(hosts):
raise ValueError("Node statuses are not equal to number of nodes.")
if not args.package:
raise ValueError("A package name must be specified.")
@ -140,22 +136,25 @@ class Network:
arg: dict_args[arg] for arg in Network.node_args_to_forward
}
try:
primary, _ = self.find_primary() if i != 0 else (None, None)
node.start(
infra.remote.StartType.start
if i == 0
else infra.remote.StartType.join,
lib_name=args.package,
node_status=node_status[i],
workspace=args.workspace,
label=args.label,
target_rpc_address=f"{primary.host}:{primary.rpc_port}"
if primary
else None,
members_certs="member*_cert.pem" if i == 0 else None,
users_certs="user*_cert.pem" if i == 0 else None,
**forwarded_args,
)
if i == 0:
node.start(
lib_name=args.package,
workspace=args.workspace,
label=args.label,
members_certs="member*_cert.pem",
users_certs="user*_cert.pem",
**forwarded_args,
)
else:
primary, _ = self.find_primary()
node.join(
lib_name=args.package,
workspace=args.workspace,
label=args.label,
target_rpc_address=f"{primary.host}:{primary.rpc_port}",
**forwarded_args,
)
node.wait_for_node_to_join()
node.network_state = NodeNetworkState.joined
except Exception:
LOG.exception("Failed to start node {}".format(i))
@ -165,7 +164,7 @@ class Network:
if primary is None:
primary = self.nodes[0]
self.wait_for_all_nodes_have_joined(primary)
self.wait_for_all_nodes_to_catch_up(primary)
self.check_for_service(primary)
LOG.success("All nodes joined network")
@ -174,10 +173,6 @@ class Network:
def start_in_recovery(self, args, ledger_file, sealed_secrets):
hosts = self.hosts or ["localhost"] * number_of_local_nodes()
node_status = args.node_status or ["pending"] * len(hosts)
if len(node_status) != len(hosts):
raise ValueError("Node statuses are not equal to number of nodes.")
if not args.package:
raise ValueError("A package name must be specified.")
@ -201,10 +196,8 @@ class Network:
# start all nodes with their own ledger to find out which ledger
# is the longest. Then, all nodes except the ones with the
# longest ledger are stopped and restarted in "join".
self.nodes[0].start(
start_type=infra.remote.StartType.recover,
self.nodes[0].recover(
lib_name=args.package,
node_status=node_status[0],
ledger_file=ledger_file,
sealed_secrets=sealed_secrets,
workspace=args.workspace,
@ -216,20 +209,19 @@ class Network:
LOG.exception("Failed to start recovery node {}".format(i))
raise
for i, node in enumerate(self.nodes):
for node in self.nodes:
if node != primary:
node.start(
infra.remote.StartType.join,
node.join(
lib_name=args.package,
node_status=node_status[i],
workspace=args.workspace,
label=args.label,
target_rpc_address=f"{primary.host}:{primary.rpc_port}",
**forwarded_args,
)
node.wait_for_node_to_join()
node.network_state = NodeNetworkState.joined
self.wait_for_all_nodes_have_joined(primary)
self.wait_for_all_nodes_to_catch_up(primary)
self.check_for_service(primary, status=ServiceStatus.OPENING)
LOG.success("All nodes joined recoverd public network")
@ -274,40 +266,33 @@ class Network:
def remove_last_node(self):
last_node = self.nodes.pop()
def add_node(self, new_node_info):
with self.find_primary()[0].member_client(format="json") as member_client:
j_result = member_client.rpc("add_node", new_node_info)
return j_result
def create_and_add_node(
self, lib_name, args, should_succeed=True, local_node_id=None
):
def create_and_add_node(self, lib_name, args):
forwarded_args = {
arg: getattr(args, arg) for arg in infra.ccf.Network.node_args_to_forward
}
if local_node_id is None:
local_node_id = self.get_next_local_node_id()
node_status = args.node_status or "pending"
local_node_id = self.get_next_local_node_id()
new_node = self.create_node(local_node_id, "localhost")
new_node.start(
primary, _ = self.find_primary()
new_node.join(
lib_name=lib_name,
node_status=node_status,
workspace=args.workspace,
label=args.label,
target_rpc_address=f"{primary.host}:{primary.rpc_port}",
**forwarded_args,
)
new_node_info = new_node.remote.info()
j_result = self.add_node(new_node_info)
try:
new_node.wait_for_node_to_join()
except RuntimeError:
LOG.error(f"New node {local_node_id} failed to join the network")
self.nodes.remove(new_node)
return None
if j_result.error is not None:
self.remove_last_node()
return (False, j_result.error["code"])
new_node.node_id = j_result.result["id"]
return (True, new_node)
new_node.network_state = NodeNetworkState.joined
self.wait_for_all_nodes_to_catch_up(primary)
LOG.success(f"New node {local_node_id} joined the network")
return new_node
def add_members(self, members):
self.members.extend(members)
@ -379,13 +364,14 @@ class Network:
member_id, remote_node, "retire_node", f"--node-id={node_id}"
)
def retire_node(self, member_id, remote_node, node_id):
def retire_node(self, remote_node, node_id):
member_id = 1
result = self.propose_retire_node(member_id, remote_node, node_id)
proposal_id = result[1]["id"]
result = self.vote_using_majority(remote_node, proposal_id, True)
with remote_node.member_client() as c:
id = c.request("read", {"table": "nodes", "key": node_id})
id = c.request("read", {"table": "ccf.nodes", "key": node_id})
assert c.response(id).result["status"].decode() == "RETIRED"
def propose_add_member(self, member_id, remote_node, new_member_cert):
@ -418,7 +404,8 @@ class Network:
def find_primary(self):
"""
Find the identity of the primary in the network and return its identity and the current term.
Find the identity of the primary in the network and return its identity
and the current term.
"""
primary_id = None
term = None
@ -439,49 +426,12 @@ class Network:
return (self.get_node_by_id(primary_id), term)
def update_nodes(self):
primary = self.find_primary()[0]
with primary.management_client() as c:
id = c.request("getNetworkInfo", {})
res = c.response(id)
# this is a json array of all the nodes in TRUSTED state
active_nodes = res.result["nodes"]
active_local_nodes = list(filter(lambda node: node.is_joined(), self.nodes))
assert len(active_nodes) == len(
active_local_nodes
), f"active node count ({len(active_nodes)}) does not match active local nodes ({len(active_local_nodes)})"
for node in active_nodes:
port = int(node["port"].decode())
local_node = next(
(
local_node
for local_node in active_local_nodes
if local_node.rpc_port == port
),
None,
)
# make sure we know all the nodes
assert (
local_node
), f"The node {str(node['host'])}:{port} is not known to the local network environment"
node_id = int(node["node_id"])
if local_node.node_id != node_id:
local_node.node_id = node_id
LOG.info(
"Correcting node id for {local_node.node_id} to be {node_id}"
)
def wait_for_all_nodes_have_joined(self, primary, timeout=3):
def wait_for_all_nodes_to_catch_up(self, primary, timeout=3):
"""
Wait for all nodes to have joined the network and globally replicated
all transactions executed on the primary (including the transactions
which added the nodes).
"""
with primary.management_client() as c:
res = c.do("getCommit", {})
local_commit_leader = res.commit
@ -489,7 +439,7 @@ class Network:
for _ in range(timeout):
joined_nodes = 0
for node in (node for node in self.nodes if node.is_joined()):
for node in self.get_running_nodes():
with node.management_client() as c:
id = c.request("getCommit", {})
resp = c.response(id)
@ -501,12 +451,12 @@ class Network:
and resp.result["term"] == term_leader
):
joined_nodes += 1
if joined_nodes == len(self.nodes):
if joined_nodes == self.get_running_nodes():
break
time.sleep(1)
assert joined_nodes == len(
self.nodes
), f"Only {joined_nodes} (out of {len(self.nodes)}) nodes have joined the network"
self.get_running_nodes()
), f"Only {joined_nodes} (out of {self.get_running_nodes()}) nodes have joined the network"
def wait_for_node_commit_sync(self, timeout=3):
"""
@ -642,6 +592,51 @@ class Node:
self.node_port = probably_free_function(self.host)
def start(
self,
lib_name,
enclave_type,
workspace,
label,
members_certs,
users_certs,
**kwargs,
):
self._start(
infra.remote.StartType.new,
lib_name,
enclave_type,
workspace,
label,
None,
members_certs,
users_certs,
**kwargs,
)
def join(
self, lib_name, enclave_type, workspace, label, target_rpc_address, **kwargs
):
self._start(
infra.remote.StartType.join,
lib_name,
enclave_type,
workspace,
label,
target_rpc_address,
**kwargs,
)
def recover(self, lib_name, enclave_type, workspace, label, **kwargs):
self._start(
infra.remote.StartType.recover,
lib_name,
enclave_type,
workspace,
label,
**kwargs,
)
def _start(
self,
start_type,
lib_name,
@ -712,10 +707,19 @@ class Node:
def is_joined(self):
return self.network_state == NodeNetworkState.joined
# TODO: Address network_state - what is it used for?
def restart(self):
self.remote.restart()
def wait_for_node_to_join(self, timeout=3):
"""
This function can be used to check that a node has successfully
joined a network and that it is part of the consensus.
"""
for _ in range(timeout):
with self.management_client() as mc:
rep = mc.do("getCommit", {})
if rep.error == None and rep.result is not None:
return
time.sleep(1)
raise RuntimeError(f"Node {self.node_id} failed to join the network")
def get_sealed_secrets(self):
return self.remote.get_sealed_secrets()

Просмотреть файл

@ -128,7 +128,6 @@ class SSHRemote(CmdMixin):
setup() connects, creates the directory and ships over the files
start() runs the specified command
stop() disconnects, which shuts down the command via SIGHUP
restart() reconnects and reruns the specified command
"""
self.hostname = hostname
# For SSHRemote, both executable files (host and enclave) and data
@ -248,10 +247,6 @@ class SSHRemote(CmdMixin):
)
self.client.close()
def restart(self):
self._connect()
self.start()
def setup(self):
"""
Connect to the remote host, empty the temporary directory if it exsits,
@ -407,9 +402,6 @@ class LocalRemote(CmdMixin):
self.stderr.close()
log_errors(self.out, self.err)
def restart(self):
self.start()
def setup(self):
"""
Empty the temporary directory if it exists,
@ -477,7 +469,6 @@ class CCFRemote(object):
ignore_quote=False,
sig_max_tx=1000,
sig_max_ms=1000,
node_status="pending",
election_timeout=1000,
memory_reserve_startup=0,
notify_server=None,
@ -498,7 +489,6 @@ class CCFRemote(object):
self.rpc_port = rpc_port
self.pem = "{}.pem".format(local_node_id)
self.quote = None
self.node_status = node_status
# Only expect a quote if the enclave is not virtual and quotes have
# not been explictly ignored
if enclave_type != "virtual" and not ignore_quote:
@ -559,7 +549,7 @@ class CCFRemote(object):
if self.quote:
cmd += [f"--quote-file={self.quote}"]
if start_type == StartType.start:
if start_type == StartType.new:
cmd += [
"start",
"--network-cert-file=networkcert.pem",
@ -616,32 +606,9 @@ class CCFRemote(object):
def start(self):
self.remote.start()
self.remote.get(self.pem)
if self.start_type in {StartType.start, StartType.recover}:
if self.start_type in {StartType.new, StartType.recover}:
self.remote.get("networkcert.pem")
def restart(self):
self.remote.restart()
def info(self):
self.remote.get(self.pem)
quote_bytes = []
if self.quote:
self.remote.get(self.quote)
quote_bytes = infra.path.quote_bytes(self.quote)
return {
"host": self.host,
"nodeport": str(self.node_port),
"pubhost": self.pubhost,
"rpcport": str(self.rpc_port),
"cert": infra.path.cert_bytes(self.pem),
"quote": quote_bytes,
"status": NodeStatus[self.node_status].value,
}
def node_cmd(self):
return self.remote._cmd()
def debug_node_cmd(self):
return self.remote._dbg()
@ -710,6 +677,6 @@ class NodeStatus(Enum):
class StartType(Enum):
start = 0
new = 0
join = 1
recover = 2

Просмотреть файл

@ -71,12 +71,6 @@ class CCFRemoteClient(object):
def start(self):
self.remote.start()
def restart(self):
self.remote.restart()
def node_cmd(self):
return self.remote._cmd()
def debug_node_cmd(self):
return self.remote._dbg()