Python infra: run CCF nodes inside containers (#2900)

This commit is contained in:
Julien Maffre 2021-09-20 17:45:38 +01:00 коммит произвёл GitHub
Родитель 767049077e
Коммит 113e5ea037
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
27 изменённых файлов: 461 добавлений и 187 удалений

Просмотреть файл

@ -7,7 +7,7 @@ trigger:
jobs:
- job: build_and_publish_docs
container: ccfciteam/ccf-ci:oe0.17.2
container: ccfciteam/ccf-ci:oe0.17.2-docker-cli
pool:
vmImage: ubuntu-20.04

Просмотреть файл

@ -27,11 +27,11 @@ schedules:
resources:
containers:
- container: nosgx
image: ccfciteam/ccf-ci:oe0.17.2
image: ccfciteam/ccf-ci:oe0.17.2-docker-cli
options: --publish-all --cap-add NET_ADMIN --cap-add NET_RAW --cap-add SYS_PTRACE -v /dev/shm:/tmp/ccache -v /lib/modules:/lib/modules:ro
- container: sgx
image: ccfciteam/ccf-ci:oe0.17.2
image: ccfciteam/ccf-ci:oe0.17.2-docker-cli
options: --publish-all --cap-add NET_ADMIN --cap-add NET_RAW --device /dev/sgx:/dev/sgx -v /dev/shm:/tmp/ccache -v /lib/modules:/lib/modules:ro
variables:

Просмотреть файл

@ -26,6 +26,7 @@ with section("parse"):
"CONSENSUS": "*",
"CONFIGURATIONS": "*",
"ADDITIONAL_ARGS": "*",
"CONTAINER_NODES": "*",
},
},
"add_e2e_sandbox_test": {

Просмотреть файл

@ -23,11 +23,11 @@ schedules:
resources:
containers:
- container: nosgx
image: ccfciteam/ccf-ci:oe0.17.2
image: ccfciteam/ccf-ci:oe0.17.2-docker-cli
options: --publish-all --cap-add NET_ADMIN --cap-add NET_RAW --cap-add SYS_PTRACE -v /dev/shm:/tmp/ccache
- container: sgx
image: ccfciteam/ccf-ci:oe0.17.2
image: ccfciteam/ccf-ci:oe0.17.2-docker-cli
options: --publish-all --cap-add NET_ADMIN --cap-add NET_RAW --device /dev/sgx:/dev/sgx -v /dev/shm:/tmp/ccache
jobs:

2
.github/workflows/ci-checks.yml поставляемый
Просмотреть файл

@ -9,7 +9,7 @@ on:
jobs:
checks:
runs-on: ubuntu-20.04
container: ccfciteam/ccf-ci:oe0.17.2
container: ccfciteam/ccf-ci:oe0.17.2-docker-cli
steps:
- name: Checkout repository

Просмотреть файл

@ -16,7 +16,7 @@ pr:
resources:
containers:
- container: sgx
image: ccfciteam/ccf-ci:oe0.17.2
image: ccfciteam/ccf-ci:oe0.17.2-docker-cli
options: --publish-all --cap-add NET_ADMIN --cap-add NET_RAW --device /dev/sgx:/dev/sgx -v /dev/shm:/tmp/ccache
jobs:

Просмотреть файл

@ -21,7 +21,7 @@ schedules:
resources:
containers:
- container: sgx
image: ccfciteam/ccf-ci:oe0.17.2
image: ccfciteam/ccf-ci:oe0.17.2-docker-cli
options: --publish-all --cap-add NET_ADMIN --cap-add NET_RAW --device /dev/sgx:/dev/sgx -v /dev/shm:/tmp/ccache
jobs:

Просмотреть файл

@ -619,6 +619,7 @@ if(BUILD_TESTS)
NAME e2e_logging_cft
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py
CONSENSUS cft
CONTAINER_NODES TRUE
ADDITIONAL_ARGS --js-app-bundle ${CMAKE_SOURCE_DIR}/samples/apps/logging/js
)
@ -725,10 +726,6 @@ if(BUILD_TESTS)
ADDITIONAL_ARGS -p "samples/apps/logging/liblogging" --duration 45m
)
# add_e2e_test( NAME reconfiguration_test_${CONSENSUS} PYTHON_SCRIPT
# ${CMAKE_SOURCE_DIR}/tests/reconfiguration.py CONSENSUS ${CONSENSUS}
# ADDITIONAL_ARGS --ccf-version ${CCF_VERSION} )
endforeach()
add_perf_test(

Просмотреть файл

@ -122,7 +122,7 @@ endforeach()
# Copy utilities from tests directory
set(CCF_TEST_UTILITIES tests.sh cimetrics_env.sh upload_pico_metrics.py
test_install.sh test_python_cli.sh
test_install.sh test_python_cli.sh docker_wrap.sh
)
foreach(UTILITY ${CCF_TEST_UTILITIES})
configure_file(
@ -458,7 +458,7 @@ function(add_e2e_test)
cmake_parse_arguments(
PARSE_ARGV 0 PARSED_ARGS ""
"NAME;PYTHON_SCRIPT;LABEL;CURL_CLIENT;CONSENSUS;"
"CONSTITUTION;ADDITIONAL_ARGS;CONFIGURATIONS"
"CONSTITUTION;ADDITIONAL_ARGS;CONFIGURATIONS;CONTAINER_NODES"
)
if(NOT PARSED_ARGS_CONSTITUTION)
@ -531,6 +531,14 @@ function(add_e2e_test)
PROPERTY ENVIRONMENT "CURL_CLIENT=ON"
)
endif()
if((${PARSED_ARGS_CONTAINER_NODES}) AND (LONG_TESTS))
# Containerised nodes are only enabled with long tests
set_property(
TEST ${PARSED_ARGS_NAME}
APPEND
PROPERTY ENVIRONMENT "CONTAINER_NODES=ON"
)
endif()
set_property(
TEST ${PARSED_ARGS_NAME}
APPEND

Просмотреть файл

@ -18,6 +18,9 @@
- import_role:
name: vegeta
tasks_from: install.yml
- import_role:
name: docker
tasks_from: install.yml
- import_role:
name: ccf_run
tasks_from: install.yml

Просмотреть файл

@ -25,6 +25,7 @@ debs:
- iptables # partition test infra
- libclang1-9 # required by doxygen
- libclang-cpp9 # required by doxygen
- docker-ce-cli
mbedtls_ver: "2.16.10"
mbedtls_dir: "mbedtls-{{ mbedtls_ver }}"

Просмотреть файл

@ -0,0 +1,12 @@
- name: Add docker-ce repository key
apt_key:
url: "https://download.docker.com/linux/ubuntu/gpg"
state: present
become: yes
- name: Add docker repository
apt_repository:
repo: "deb https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable"
state: present
update_cache: yes
become: yes

Просмотреть файл

@ -80,7 +80,7 @@ def run(args):
initial_metrics = get_session_metrics(primary)
assert initial_metrics["active"] <= initial_metrics["peak"], initial_metrics
main_session_metrics = initial_metrics["interfaces"][
f"{primary.host}:{primary.rpc_port}"
f"{primary.rpc_host}:{primary.rpc_port}"
]
assert (
main_session_metrics["soft_cap"] == args.max_open_sessions

37
tests/docker_wrap.sh Executable file
Просмотреть файл

@ -0,0 +1,37 @@
#!/bin/bash
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the Apache 2.0 License.
# The purpose of this script is to wrap the launch of a CCF node
# when the IP address of the node isn't known in advance
# (e.g. dynamically launched container).
# This sets the --node-address and --public-rpc-address arguments
# based on the IP address of the container (it assumes that the container
# is connected to service-specific network)
# Note: This should become less hacky once https://github.com/microsoft/CCF/issues/2612 is implemented
set -e
cmd=$*
container_ip=$(hostname -i | cut -d " " -f 2) # Network container IP address
addresses="--node-address=${container_ip}:0 --public-rpc-address=${container_ip}:0"
# Required for 1.x releases
addresses="${addresses} --san=iPAddress:${container_ip}"
startup_cmd=""
for c in " start " " join" " recover "; do
if [[ $cmd == *"${c}"* ]]; then
startup_cmd=${c}
fi
done
if [ -z "${startup_cmd}" ]; then
echo "Command does not container valid cchost startup command"
exit 1
fi
# Insert node and public RPC address in command line (yikes!)
cmd="${cmd%%${startup_cmd}*} ${addresses} ${startup_cmd} ${cmd##*${startup_cmd}}"
eval "${cmd}"

Просмотреть файл

@ -72,9 +72,9 @@ def test_illegal(network, args, verify=True):
)
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
conn = context.wrap_socket(
sock, server_side=False, server_hostname=primary.host
sock, server_side=False, server_hostname=primary.get_public_rpc_host()
)
conn.connect((primary.host, primary.pubport))
conn.connect((primary.get_public_rpc_host(), primary.pubport))
LOG.info(f"Sending: {content}")
conn.sendall(content)
response = HTTPResponse(conn)
@ -1072,7 +1072,7 @@ def test_primary(network, args):
assert r.status_code == http.HTTPStatus.PERMANENT_REDIRECT.value
assert (
r.headers["location"]
== f"https://{primary.pubhost}:{primary.pubport}/node/primary"
== f"https://{primary.get_public_rpc_host()}:{primary.pubport}/node/primary"
)
return network
@ -1091,7 +1091,7 @@ def test_network_node_info(network, args):
nodes_by_id = {node["node_id"]: node for node in nodes}
for n in all_nodes:
node = nodes_by_id[n.node_id]
assert node["host"] == n.pubhost
assert node["host"] == n.get_public_rpc_host()
assert node["port"] == str(n.pubport)
assert node["primary"] == (n == primary)
del nodes_by_id[n.node_id]
@ -1107,7 +1107,7 @@ def test_network_node_info(network, args):
assert r.status_code == http.HTTPStatus.PERMANENT_REDIRECT.value
assert (
r.headers["location"]
== f"https://{node.pubhost}:{node.pubport}/node/network/nodes/{node.node_id}"
== f"https://{node.get_public_rpc_host()}:{node.pubport}/node/network/nodes/{node.node_id}"
), r.headers["location"]
# Following that redirect gets you the node info
@ -1115,7 +1115,7 @@ def test_network_node_info(network, args):
assert r.status_code == http.HTTPStatus.OK.value
body = r.body.json()
assert body["node_id"] == node.node_id
assert body["host"] == node.pubhost
assert body["host"] == node.get_public_rpc_host()
assert body["port"] == str(node.pubport)
assert body["primary"] == (node == primary)
@ -1129,7 +1129,7 @@ def test_network_node_info(network, args):
assert r.status_code == http.HTTPStatus.PERMANENT_REDIRECT.value
assert (
r.headers["location"]
== f"https://{primary.pubhost}:{primary.pubport}/node/primary"
== f"https://{primary.get_public_rpc_host()}:{primary.pubport}/node/primary"
), r.headers["location"]
r = c.head("/node/primary", allow_redirects=True)
@ -1139,7 +1139,7 @@ def test_network_node_info(network, args):
r = c.get("/node/network/nodes/primary", allow_redirects=False)
assert r.status_code == http.HTTPStatus.PERMANENT_REDIRECT.value
actual = r.headers["location"]
expected = f"https://{node.pubhost}:{node.pubport}/node/network/nodes/{primary.node_id}"
expected = f"https://{node.get_public_rpc_host()}:{node.pubport}/node/network/nodes/{primary.node_id}"
assert actual == expected, f"{actual} != {expected}"
# Following that redirect gets you the primary's node info
@ -1305,11 +1305,7 @@ def run(args):
) as network:
network.start_and_join(args)
network = test(
network,
args,
verify=args.package != "libjs_generic",
)
network = test(network, args, verify=args.package != "libjs_generic")
network = test_illegal(network, args, verify=args.package != "libjs_generic")
network = test_large_messages(network, args)
network = test_remove(network, args)

Просмотреть файл

@ -94,7 +94,9 @@ def run_tls_san_checks(args):
args.san = None
new_node = network.create_node(f"local://localhost:0,{dummy_public_rpc_host}")
network.join_node(new_node, args.package, args)
sans = infra.crypto.get_san_from_pem_cert(new_node.get_tls_certificate_pem())
sans = infra.crypto.get_san_from_pem_cert(
new_node.get_tls_certificate_pem(use_public_rpc_host=False)
)
assert len(sans) == 1, "Expected exactly one SAN"
assert sans[0].value == ipaddress.ip_address(dummy_public_rpc_host)

Просмотреть файл

@ -51,13 +51,18 @@ def strip_release_branch_name(branch_name):
return branch_name[len(BRANCH_RELEASE_PREFIX) :]
def strip_release_tag_name(tag_name):
assert is_release_tag(tag_name), tag_name
return tag_name[len(TAG_RELEASE_PREFIX) :]
def get_major_version_from_release_branch_name(full_branch_name):
return int(strip_release_branch_name(full_branch_name).split(".")[0])
def get_version_from_tag_name(tag_name):
assert is_release_tag(tag_name), tag_name
return Version(tag_name[len(TAG_RELEASE_PREFIX) :])
return Version(strip_release_tag_name(tag_name))
def get_release_branch_from_branch_name(branch_name):
@ -107,6 +112,9 @@ class Repository:
if "heads/release" in branch
]
def get_latest_dev_tag(self):
return self.tags[-1]
def get_release_branches_names(self):
# Branches are ordered based on major version, with oldest first
return sorted(
@ -175,7 +183,7 @@ class Repository:
return releases
def install_release(self, tag):
stripped_tag = tag[len(TAG_RELEASE_PREFIX) :]
stripped_tag = strip_release_tag_name(tag)
install_directory = f"{INSTALL_DIRECTORY_PREFIX}{stripped_tag}"
debian_package_url = get_debian_package_url_from_tag_name(tag)

Просмотреть файл

@ -248,7 +248,7 @@ class Network:
workspace=args.workspace,
label=args.label,
common_dir=self.common_dir,
target_rpc_address=f"{target_node.host}:{target_node.rpc_port}",
target_rpc_address=f"{target_node.get_public_rpc_host()}:{target_node.rpc_port}",
snapshot_dir=snapshot_dir,
ledger_dir=current_ledger_dir,
read_only_ledger_dir=committed_ledger_dir,
@ -588,21 +588,9 @@ class Network:
raise NodeShutdownError("Fatal error found during node shutdown")
def join_node(
self,
node,
lib_name,
args,
target_node=None,
timeout=JOIN_TIMEOUT,
**kwargs,
self, node, lib_name, args, target_node=None, timeout=JOIN_TIMEOUT, **kwargs
):
self._add_node(
node,
lib_name,
args,
target_node,
**kwargs,
)
self._add_node(node, lib_name, args, target_node, **kwargs)
primary, _ = self.find_primary()
try:
@ -669,10 +657,7 @@ class Network:
os.path.join(self.common_dir, f"{local_user_id}_cert.pem"), encoding="utf-8"
) as c:
service_user_id = infra.crypto.compute_cert_der_hash_hex_from_pem(c.read())
new_user = UserInfo(
local_user_id,
service_user_id,
)
new_user = UserInfo(local_user_id, service_user_id)
if record:
self.users.append(new_user)
@ -895,10 +880,7 @@ class Network:
with backup.client("user0") as c:
_ = c.post(
"/app/log/private",
{
"id": -1,
"msg": "This is submitted to force a view change",
},
{"id": -1, "msg": "This is submitted to force a view change"},
)
time.sleep(1)
except CCFConnectionException:

Просмотреть файл

@ -5,6 +5,7 @@ from contextlib import contextmanager, closing
from enum import Enum, auto
import infra.crypto
import infra.remote
import infra.remote_shim
import infra.net
import infra.path
import ccf.clients
@ -15,6 +16,9 @@ import re
import ipaddress
import ssl
# pylint: disable=import-error, no-name-in-module
from setuptools.extern.packaging.version import Version # type: ignore
from loguru import logger as LOG
BASE_NODE_CLIENT_HOST = "127.100.0.0"
@ -66,6 +70,11 @@ def get_snapshot_seqnos(file_name):
return int(seqnos[0]), int(seqnos[1])
def strip_version(full_version):
dash_offset = 1 if full_version.startswith("ccf-") else 0
return full_version.split("-")[dash_offset]
class Node:
# Default to using httpx
curl = False
@ -94,35 +103,49 @@ class Node:
self.node_client_host = None
self.interfaces = []
self.version = version
self.major_version = (
Version(strip_version(self.version)).release[0]
if self.version is not None
else None
)
self.consensus = None
if os.getenv("CONTAINER_NODES"):
self.remote_shim = infra.remote_shim.DockerShim
else:
self.remote_shim = infra.remote_shim.PassThroughShim
if isinstance(host, str):
host = infra.e2e_args.HostSpec.from_str(host)
if host.protocol == "local":
self.remote_impl = infra.remote.LocalRemote
if not version or version > 1:
self.node_client_host = str(
ipaddress.ip_address(BASE_NODE_CLIENT_HOST) + self.local_node_id
)
# Node client address does not currently work with DockerShim
if self.remote_shim != infra.remote_shim.DockerShim:
if not self.major_version or self.major_version > 1:
self.node_client_host = str(
ipaddress.ip_address(BASE_NODE_CLIENT_HOST) + self.local_node_id
)
elif host.protocol == "ssh":
self.remote_impl = infra.remote.SSHRemote
else:
assert False, f"{host} does not start with 'local://' or 'ssh://'"
host_ = host.rpchost
self.host, *port = host_.split(":")
self.rpc_host, *port = host_.split(":")
self.rpc_port = int(port[0]) if port else None
if self.host == "localhost":
self.host = infra.net.expand_localhost()
if self.rpc_host == "localhost":
self.rpc_host = infra.net.expand_localhost()
pubhost_ = host.public_rpchost
if pubhost_:
self.pubhost, *pubport = pubhost_.split(":")
self.pubport = int(pubport[0]) if pubport else self.rpc_port
else:
self.pubhost = self.host
self.pubhost = self.rpc_host
self.pubport = self.rpc_port
self.node_host = self.rpc_host
self.node_port = node_port
self.max_open_sessions = host.max_open_sessions
@ -223,25 +246,28 @@ class Node:
if self.max_open_sessions_hard:
kwargs["max_open_sessions_hard"] = self.max_open_sessions_hard
self.common_dir = common_dir
self.remote = infra.remote.CCFRemote(
self.remote = self.remote_shim(
start_type,
lib_path,
self.local_node_id,
self.host,
self.pubhost,
self.node_port,
self.rpc_port,
self.node_client_host,
self.remote_impl,
enclave_type,
self.remote_impl,
workspace,
label,
common_dir,
label=label,
local_node_id=self.local_node_id,
rpc_host=self.rpc_host,
node_host=self.node_host,
pub_host=self.pubhost,
node_port=self.node_port,
rpc_port=self.rpc_port,
node_client_host=self.node_client_host,
target_rpc_address=target_rpc_address,
members_info=members_info,
snapshot_dir=snapshot_dir,
binary_dir=self.binary_dir,
additional_raw_node_args=self.additional_raw_node_args,
version=self.version,
**kwargs,
)
self.remote.setup()
@ -256,7 +282,7 @@ class Node:
print("")
print(
"================= Please run the below command on "
+ self.host
+ self.rpc_host
+ " and press enter to continue ================="
)
print("")
@ -267,7 +293,14 @@ class Node:
if self.perf:
self.remote.set_perf()
self.remote.start()
self.remote.get_startup_files(self.common_dir)
try:
self.remote.get_startup_files(self.common_dir)
except Exception as e:
LOG.exception(e)
self.remote.get_logs(tail_lines_len=None)
raise
self.consensus = kwargs.get("consensus")
with open(
@ -285,9 +318,10 @@ class Node:
with open(node_address_path, "r", encoding="utf-8") as f:
node_host, node_port = f.read().splitlines()
node_port = int(node_port)
assert (
node_host == self.host
), f"Unexpected change in node address from {self.host} to {node_host}"
if self.remote_shim != infra.remote_shim.DockerShim:
assert (
node_host == self.node_host
), f"Unexpected change in node address from {self.node_host} to {node_host}"
if self.node_port is None and self.node_port != 0:
self.node_port = node_port
assert (
@ -302,9 +336,10 @@ class Node:
for i, (rpc_host, rpc_port) in enumerate(zip(*it)):
rpc_port = int(rpc_port)
if i == 0:
assert (
rpc_host == self.host
), f"Unexpected change in RPC address from {self.host} to {rpc_host}"
if self.remote_shim != infra.remote_shim.DockerShim:
assert (
rpc_host == self.rpc_host
), f"Unexpected change in RPC address from {self.rpc_host} to {rpc_host}"
if self.rpc_port is not None and self.rpc_port != 0:
assert (
rpc_port == self.rpc_port
@ -412,9 +447,10 @@ class Node:
}
def signing_auth(self, name=None):
return {
"signing_auth": self.identity(name),
}
return {"signing_auth": self.identity(name)}
def get_public_rpc_host(self):
return self.remote.get_host()
def client(
self, identity=None, signing_identity=None, interface_idx=None, **kwargs
@ -434,7 +470,9 @@ class Node:
akwargs["curl"] = True
if interface_idx is None:
return ccf.clients.client(self.pubhost, self.pubport, **akwargs)
return ccf.clients.client(
self.get_public_rpc_host(), self.pubport, **akwargs
)
else:
try:
host, port = self.interfaces[interface_idx]
@ -445,8 +483,13 @@ class Node:
raise
return ccf.clients.client(host, port, **akwargs)
def get_tls_certificate_pem(self):
return ssl.get_server_certificate((self.host, self.rpc_port))
def get_tls_certificate_pem(self, use_public_rpc_host=True):
return ssl.get_server_certificate(
(
self.get_public_rpc_host() if use_public_rpc_host else self.rpc_host,
self.rpc_port,
)
)
def suspend(self):
assert not self.suspended

Просмотреть файл

@ -122,7 +122,7 @@ class Partitioner:
# Isolates node server socket
server_rule = {
**base_rule,
"dst": node.host,
"dst": node.node_host,
"tcp": {"dport": str(node.node_port)},
}
@ -137,7 +137,7 @@ class Partitioner:
# If there is one, only isolate from specific node
if other:
server_rule["src"] = other.node_client_host
client_rule["dst"] = other.host
client_rule["dst"] = other.node_host
name += f" from node {other.local_node_id}"
if iptc.easy.has_rule("filter", CCF_IPTABLES_CHAIN, server_rule):

Просмотреть файл

@ -47,11 +47,14 @@ def sftp_session(hostname):
client.close()
def log_errors(out_path, err_path):
DEFAULT_TAIL_LINES_LEN = 10
def log_errors(out_path, err_path, tail_lines_len=DEFAULT_TAIL_LINES_LEN):
error_filter = ["[fail ]", "[fatal]"]
error_lines = []
try:
tail_lines = deque(maxlen=10)
tail_lines = deque(maxlen=tail_lines_len)
with open(out_path, "r", errors="replace", encoding="utf-8") as lines:
for line in lines:
stripped_line = line.rstrip()
@ -121,10 +124,8 @@ class SSHRemote(CmdMixin):
data_files,
cmd,
workspace,
label,
common_dir,
env=None,
log_format_json=None,
):
"""
Runs a command on a remote host, through an SSH connection. A temporary
@ -132,7 +133,7 @@ class SSHRemote(CmdMixin):
run out of that directory.
Note that the name matters, since the temporary directory that will be first
deleted, then created and populated is workspace/label_name. There is deliberately no
deleted, then created and populated is workspace/name. There is deliberately no
cleanup on shutdown, to make debugging/inspection possible.
setup() connects, creates the directory and ships over the files
@ -149,7 +150,7 @@ class SSHRemote(CmdMixin):
self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
self.proc_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
self.common_dir = common_dir
self.root = os.path.join(workspace, f"{label}_{name}")
self.root = os.path.join(workspace, name)
self.name = name
self.env = env or {}
self.out = os.path.join(self.root, "out")
@ -228,8 +229,7 @@ class SSHRemote(CmdMixin):
)
for f in session.listdir(src_dir):
session.get(
os.path.join(src_dir, f),
os.path.join(dst_dir, f),
os.path.join(src_dir, f), os.path.join(dst_dir, f)
)
else:
session.get(
@ -263,14 +263,12 @@ class SSHRemote(CmdMixin):
raise ValueError(self.root)
return files
def get_logs(self):
def get_logs(self, tail_lines_len=DEFAULT_TAIL_LINES_LEN):
with sftp_session(self.hostname) as session:
for filepath in (self.err, self.out):
try:
local_file_name = "{}_{}_{}".format(
self.hostname,
self.name,
os.path.basename(filepath),
self.hostname, self.name, os.path.basename(filepath)
)
dst_path = os.path.join(self.common_dir, local_file_name)
session.get(filepath, dst_path)
@ -281,6 +279,11 @@ class SSHRemote(CmdMixin):
filepath, dst_path, self.hostname
)
)
return log_errors(
os.path.join(self.common_dir, "{}_{}_out".format(self.hostname, self.name)),
os.path.join(self.common_dir, "{}_{}_err".format(self.hostname, self.name)),
tail_lines_len=tail_lines_len,
)
def start(self):
"""
@ -325,11 +328,10 @@ class SSHRemote(CmdMixin):
Disconnect the client, and therefore shut down the command as well.
"""
LOG.info("[{}] closing".format(self.hostname))
self.get_logs()
errors, fatal_errors = log_errors(
os.path.join(self.common_dir, "{}_{}_out".format(self.hostname, self.name)),
os.path.join(self.common_dir, "{}_{}_err".format(self.hostname, self.name)),
)
(
errors,
fatal_errors,
) = self.get_logs()
self.client.close()
self.proc_client.close()
return errors, fatal_errors
@ -386,10 +388,8 @@ class LocalRemote(CmdMixin):
data_files,
cmd,
workspace,
label,
common_dir,
env=None,
log_format_json=None,
):
"""
Local Equivalent to the SSHRemote
@ -398,7 +398,7 @@ class LocalRemote(CmdMixin):
self.exe_files = exe_files
self.data_files = data_files
self.cmd = cmd
self.root = os.path.join(workspace, f"{label}_{name}")
self.root = os.path.join(workspace, name)
self.common_dir = common_dir
self.proc = None
self.stdout = None
@ -412,7 +412,7 @@ class LocalRemote(CmdMixin):
LOG.info("[{}] {}".format(self.hostname, cmd))
return subprocess.call(cmd, shell=True)
def _cp(self, src_path, dst_path):
def cp(self, src_path, dst_path):
if os.path.isdir(src_path):
assert self._rc("rm -rf {}".format(os.path.join(dst_path))) == 0
assert self._rc("cp -r {} {}".format(src_path, dst_path)) == 0
@ -428,7 +428,7 @@ class LocalRemote(CmdMixin):
assert self._rc("ln -s {} {}".format(src_path, dst_path)) == 0
for path in self.data_files:
dst_path = os.path.join(self.root, os.path.basename(path))
self._cp(path, dst_path)
self.cp(path, dst_path)
def get(
self,
@ -449,7 +449,7 @@ class LocalRemote(CmdMixin):
if not pre_condition_func(path, os.listdir):
raise RuntimeError("Pre-condition for getting remote files failed")
target_name = target_name or os.path.basename(src_path)
self._cp(path, os.path.join(dst_path, target_name))
self.cp(path, os.path.join(dst_path, target_name))
def list_files(self):
return os.listdir(self.root)
@ -476,6 +476,9 @@ class LocalRemote(CmdMixin):
def resume(self):
self.proc.send_signal(signal.SIGCONT)
def get_logs(self, tail_lines_len=DEFAULT_TAIL_LINES_LEN):
return log_errors(self.out, self.err, tail_lines_len=tail_lines_len)
def stop(self):
"""
Disconnect the client, and therefore shut down the command as well.
@ -488,7 +491,7 @@ class LocalRemote(CmdMixin):
self.stdout.close()
if self.stderr:
self.stderr.close()
return log_errors(self.out, self.err)
return self.get_logs()
def setup(self):
"""
@ -497,9 +500,10 @@ class LocalRemote(CmdMixin):
"""
self._setup_files()
def get_cmd(self):
cmd = " ".join(self.cmd)
return f"cd {self.root} && {cmd} 1> {self.out} 2> {self.err}"
def get_cmd(self, include_dir=True):
cmd = f"cd {self.root} && " if include_dir else ""
cmd += f'{" ".join(self.cmd)} 1> {self.out} 2> {self.err}'
return cmd
def debug_node_cmd(self):
cmd = " ".join(self.cmd)
@ -525,10 +529,7 @@ CCF_TO_OE_LOG_LEVEL = {
def make_address(host, port=None):
if port is not None:
return f"{host}:{port}"
else:
return f"{host}:0"
return f"{host}:{port or 0}"
class CCFRemote(object):
@ -539,17 +540,18 @@ class CCFRemote(object):
self,
start_type,
lib_path,
local_node_id,
host,
pubhost,
node_port,
rpc_port,
node_client_host,
remote_class,
enclave_type,
remote_class,
workspace,
label,
common_dir,
label="",
local_node_id=None,
rpc_host=None,
node_host=None,
pub_host=None,
node_port=0,
rpc_port=0,
node_client_host=None,
target_rpc_address=None,
members_info=None,
snapshot_dir=None,
@ -576,20 +578,25 @@ class CCFRemote(object):
jwt_key_refresh_interval_s=None,
curve_id=None,
client_connection_timeout_ms=None,
version=None,
include_addresses=True,
additional_raw_node_args=None,
):
"""
Run a ccf binary on a remote host.
"""
self.name = f"{label}_{local_node_id}"
self.start_type = start_type
self.local_node_id = local_node_id
self.pem = f"{local_node_id}.pem"
self.node_address_path = f"{local_node_id}.node_address"
self.rpc_address_path = f"{local_node_id}.rpc_address"
self.binary_dir = binary_dir
self.BIN = infra.path.build_bin_path(
self.BIN, enclave_type, binary_dir=binary_dir
)
self.common_dir = common_dir
self.pub_host = pub_host
self.ledger_dir = os.path.normpath(ledger_dir) if ledger_dir else None
self.ledger_dir_name = (
@ -633,11 +640,9 @@ class CCFRemote(object):
bin_path,
f"--enclave-file={enclave_path}",
f"--enclave-type={enclave_type}",
f"--node-address={make_address(host, node_port)}",
f"--node-address-file={self.node_address_path}",
f"--rpc-address={make_address(host, rpc_port)}",
f"--rpc-address={make_address(rpc_host, rpc_port)}",
f"--rpc-address-file={self.rpc_address_path}",
f"--public-rpc-address={make_address(pubhost, rpc_port)}",
f"--ledger-dir={self.ledger_dir_name}",
f"--snapshot-dir={self.snapshot_dir_name}",
f"--node-cert-file={self.pem}",
@ -647,6 +652,12 @@ class CCFRemote(object):
f"--worker-threads={worker_threads}",
]
if include_addresses:
cmd += [
f"--node-address={make_address(node_host, node_port)}",
f"--public-rpc-address={make_address(pub_host, rpc_port)}",
]
if node_client_host:
cmd += [f"--node-client-interface={node_client_host}"]
@ -698,10 +709,7 @@ class CCFRemote(object):
cmd += [str(s)]
if start_type == StartType.new:
cmd += [
"start",
"--network-cert-file=networkcert.pem",
]
cmd += ["start", "--network-cert-file=networkcert.pem"]
for fragment in constitution:
cmd.append(f"--constitution={os.path.basename(fragment)}")
data_files += [
@ -750,16 +758,7 @@ class CCFRemote(object):
env["OE_LOG_LEVEL"] = oe_log_level
self.remote = remote_class(
local_node_id,
host,
exe_files,
data_files,
cmd,
workspace,
label,
common_dir,
env,
log_format_json,
self.name, rpc_host, exe_files, data_files, cmd, workspace, common_dir, env
)
def setup(self):
@ -803,9 +802,7 @@ class CCFRemote(object):
# suite, this argument will probably default to True (or be deleted entirely)
def get_ledger(self, ledger_dir_name, include_read_only_dirs=False):
self.remote.get(
self.ledger_dir_name,
self.common_dir,
target_name=ledger_dir_name,
self.ledger_dir_name, self.common_dir, target_name=ledger_dir_name
)
read_only_ledger_dirs = []
if include_read_only_dirs and self.read_only_ledger_dir is not None:
@ -822,10 +819,7 @@ class CCFRemote(object):
read_only_ledger_dirs.append(
os.path.join(self.common_dir, read_only_ledger_dir_name)
)
return (
os.path.join(self.common_dir, ledger_dir_name),
read_only_ledger_dirs,
)
return (os.path.join(self.common_dir, ledger_dir_name), read_only_ledger_dirs)
def get_snapshots(self):
self.remote.get(self.snapshot_dir_name, self.common_dir)
@ -848,6 +842,12 @@ class CCFRemote(object):
paths += [os.path.join(self.remote.root, self.read_only_ledger_dir_name)]
return paths
def get_logs(self, tail_lines_len=DEFAULT_TAIL_LINES_LEN):
return self.remote.get_logs(tail_lines_len=tail_lines_len)
def get_host(self):
return self.pub_host
class StartType(Enum):
new = auto()

Просмотреть файл

@ -58,14 +58,7 @@ class CCFRemoteClient(object):
] + client_command_args
self.remote = remote_class(
name,
host,
[self.BIN],
self.DEPS,
cmd,
workspace,
label,
self.common_dir,
name, host, [self.BIN], self.DEPS, cmd, workspace, self.common_dir
)
def setup(self):

197
tests/infra/remote_shim.py Normal file
Просмотреть файл

@ -0,0 +1,197 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the Apache 2.0 License.
import infra.remote
import docker
import re
import os
import pathlib
import grp
import infra.github
from loguru import logger as LOG
def is_docker_env():
"""Returns true if the process executing _this_ code already runs inside Docker"""
return os.path.isfile("/.dockerenv")
def is_azure_devops_env():
return "SYSTEM_TEAMFOUNDATIONCOLLECTIONURI" in os.environ
def map_azure_devops_docker_workspace_dir(workspace_dir):
return workspace_dir.replace("__w", "/mnt/vss/_work")
# Docker image name prefix
DOCKER_IMAGE_NAME_PREFIX = "ccfciteam/ccf-app-run"
# Network name
AZURE_DEVOPS_CONTAINER_NETWORK_ENV_VAR = "AGENT_CONTAINERNETWORK"
DOCKER_NETWORK_NAME_LOCAL = "ccf_test_docker_network"
# Identifier for all CCF test containers
CCF_TEST_CONTAINERS_LABEL = "ccf_test"
NODE_STARTUP_WRAPPER_SCRIPT = "docker_wrap.sh"
def kernel_has_sgx_builtin():
with open("/proc/cpuinfo", "r", encoding="utf-8") as cpu_info:
f = re.compile("^flags.*sgx.*")
for line in cpu_info:
if f.match(line):
return True
return False
class PassThroughShim(infra.remote.CCFRemote):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Current limitations, which should be overcomable:
# No support for SGX kernel built-in support (i.e. 5.11+ kernel) in Docker environment (e.g. docker CI):
# file permission issues, and cannot connect to docker daemon
class DockerShim(infra.remote.CCFRemote):
def _stop_container(self, container):
try:
container.stop()
container.remove()
LOG.info(f"Stopped container {container.name}")
except docker.errors.NotFound:
pass
def __init__(self, *args, **kwargs):
self.docker_client = docker.DockerClient()
self.container_ip = None # Assigned when container is started
label = kwargs.get("label")
local_node_id = kwargs.get("local_node_id")
ccf_version = kwargs.get("version")
# Sanitise container name, replacing illegal characters with underscores
self.container_name = f"{label}_{local_node_id}"
self.container_name = re.sub(r"[^a-zA-Z0-9_.-]", "_", self.container_name)
# Create network to connect all containers to (for n2n communication, etc.).
# In a Docker environment, use existing network (either the one provided by
# ADO or the one already created by the runner).
# Otherwise, create network on the fly.
if is_docker_env():
self.network = self.docker_client.networks.get(
os.environ[AZURE_DEVOPS_CONTAINER_NETWORK_ENV_VAR]
if is_azure_devops_env()
else DOCKER_NETWORK_NAME_LOCAL
)
else:
try:
self.network = self.docker_client.networks.get(
DOCKER_NETWORK_NAME_LOCAL
)
except docker.errors.NotFound:
LOG.debug(f"Creating network {DOCKER_NETWORK_NAME_LOCAL}")
self.network = self.docker_client.networks.create(
DOCKER_NETWORK_NAME_LOCAL
)
# Stop and delete existing container(s)
if local_node_id == 0:
for c in self.docker_client.containers.list(
all=True, filters={"label": [CCF_TEST_CONTAINERS_LABEL, label]}
):
self._stop_container(c)
LOG.debug(
f'Network {self.network.name} [{self.network.attrs["IPAM"]["Config"][0]["Gateway"]}]'
)
# Group and device for kernel sgx builtin support (or not)
if kernel_has_sgx_builtin():
gid = grp.getgrnam("sgx_prv").gr_gid
devices = (
["/dev/sgx/enclave", "/dev/sgx/provision"]
if os.path.isdir("/dev/sgx")
else None
)
else:
gid = os.getgid()
devices = ["/dev/sgx"] if os.path.isdir("/dev/sgx") else None
# Mount workspace volume
cwd = str(pathlib.Path().resolve())
cwd_host = (
map_azure_devops_docker_workspace_dir(cwd) if is_azure_devops_env() else cwd
)
# Deduce container tag from node version
repo = infra.github.Repository()
image_name = f"{DOCKER_IMAGE_NAME_PREFIX}:"
if ccf_version is not None:
image_name += ccf_version
else:
image_name += infra.github.strip_release_tag_name(repo.get_latest_dev_tag())
try:
self.docker_client.images.get(image_name)
except docker.errors.ImageNotFound:
LOG.info(f"Pulling image {image_name}")
self.docker_client.images.pull(image_name)
# Bind local RPC address to 0.0.0.0, so that it be can be accessed from outside container
kwargs["rpc_host"] = "0.0.0.0"
kwargs["include_addresses"] = False
super().__init__(*args, **kwargs)
self.command = f'./{NODE_STARTUP_WRAPPER_SCRIPT} "{self.remote.get_cmd(include_dir=False)}"'
self.container = self.docker_client.containers.create(
image_name,
volumes={cwd_host: {"bind": cwd, "mode": "rw"}},
devices=devices,
command=self.command,
name=self.container_name,
init=True,
labels=[label, CCF_TEST_CONTAINERS_LABEL],
publish_all_ports=True,
user=f"{os.getuid()}:{gid}",
working_dir=self.remote.root,
detach=True,
auto_remove=True,
)
self.network.connect(self.container)
LOG.debug(f"Created container {self.container_name} [{image_name}]")
def setup(self):
src_path = os.path.join(".", NODE_STARTUP_WRAPPER_SCRIPT)
self.remote.setup()
self.remote.cp(src_path, self.remote.root)
def start(self):
LOG.info(self.command)
self.container.start()
self.container.reload() # attrs are cached
self.container_ip = self.container.attrs["NetworkSettings"]["Networks"][
self.network.name
]["IPAddress"]
LOG.debug(f"Started container {self.container_name} [{self.container_ip}]")
def get_host(self):
return self.container_ip
def stop(self):
try:
self.container.stop()
LOG.info(f"Stopped container {self.container.name}")
except docker.errors.NotFound:
pass
return self.remote.get_logs()
def suspend(self):
self.container.pause()
def resume(self):
self.container.unpause()

Просмотреть файл

@ -63,7 +63,7 @@ def configure_remote_client(args, client_id, client_host, node, command_args):
"client_" + str(client_id),
client_host,
args.client,
node.host,
node.rpc_host,
node.rpc_port,
args.workspace,
args.label,

Просмотреть файл

@ -14,8 +14,6 @@ import os
import json
import time
# pylint: disable=import-error, no-name-in-module
from setuptools.extern.packaging.version import Version # type: ignore
from loguru import logger as LOG
@ -65,9 +63,7 @@ def get_new_constitution_for_install(args, install_path):
return args.constitution
def test_new_service(
network, args, install_path, binary_dir, library_dir, major_version
):
def test_new_service(network, args, install_path, binary_dir, library_dir, version):
LOG.info("Update constitution")
primary, _ = network.find_primary()
new_constitution = get_new_constitution_for_install(args, install_path)
@ -80,7 +76,7 @@ def test_new_service(
"local://localhost",
binary_dir=binary_dir,
library_dir=library_dir,
version=major_version,
version=version,
)
network.join_node(new_node, args.package, args)
network.trust_node(new_node, args)
@ -115,8 +111,8 @@ def run_code_upgrade_from(
args,
from_install_path,
to_install_path,
from_major_version=None,
to_major_version=None,
from_version=None,
to_version=None,
):
from_binary_dir, from_library_dir = get_bin_and_lib_dirs_for_install_path(
from_install_path
@ -137,7 +133,7 @@ def run_code_upgrade_from(
pdb=args.pdb,
txs=txs,
jwt_issuer=jwt_issuer,
version=from_major_version,
version=from_version,
) as network:
network.start_and_join(args)
@ -165,7 +161,7 @@ def run_code_upgrade_from(
"local://localhost",
binary_dir=to_binary_dir,
library_dir=to_library_dir,
version=to_major_version,
version=to_version,
)
network.join_node(
new_node, args.package, args, from_snapshot=from_snapshot
@ -177,7 +173,7 @@ def run_code_upgrade_from(
# Verify that all nodes run the expected CCF version
for node in network.get_joined_nodes():
# Note: /node/version endpoint was added in 2.x
if not node.version or node.version > 1:
if not node.major_version or node.major_version > 1:
with node.client() as c:
r = c.get("/node/version")
expected_version = node.version or args.ccf_version
@ -212,7 +208,7 @@ def run_code_upgrade_from(
jwt_issuer.refresh_keys()
# Note: /gov/jwt_keys/all endpoint was added in 2.x
primary, _ = network.find_nodes()
if primary.version and primary.version > 1:
if not primary.major_version or primary.version > 1:
jwt_issuer.wait_for_refresh(network)
else:
time.sleep(3)
@ -239,7 +235,7 @@ def run_code_upgrade_from(
to_install_path,
to_binary_dir,
to_library_dir,
to_major_version,
to_version,
)
# Check that the ledger can be parsed
@ -264,8 +260,8 @@ def run_live_compatibility_with_latest(args, repo, local_branch):
args,
from_install_path=lts_install_path,
to_install_path=LOCAL_CHECKOUT_DIRECTORY,
from_major_version=Version(lts_version).release[0],
to_major_version=local_major_version,
from_version=lts_version,
to_version=local_major_version,
)
return lts_version
@ -290,8 +286,8 @@ def run_live_compatibility_with_following(args, repo, local_branch):
args,
from_install_path=LOCAL_CHECKOUT_DIRECTORY,
to_install_path=lts_install_path,
from_major_version=local_major_version,
to_major_version=Version(lts_version).release[0],
from_version=local_major_version,
to_version=lts_version,
)
return lts_version
@ -325,14 +321,11 @@ def run_ledger_compatibility_since_first(args, local_branch, use_snapshot):
if lts_release:
version, install_path = repo.install_release(lts_release)
lts_versions.append(version)
major_version = Version(version).release[0]
set_js_args(args, install_path)
else:
version = args.ccf_version
install_path = LOCAL_CHECKOUT_DIRECTORY
major_version = infra.github.get_major_version_from_branch_name(
local_branch
)
binary_dir, library_dir = get_bin_and_lib_dirs_for_install_path(
install_path
)
@ -344,7 +337,7 @@ def run_ledger_compatibility_since_first(args, local_branch, use_snapshot):
"library_dir": library_dir,
"txs": txs,
"jwt_issuer": jwt_issuer,
"version": major_version,
"version": version,
}
if idx == 0:
LOG.info(f"Starting new service (version: {version})")
@ -369,7 +362,7 @@ def run_ledger_compatibility_since_first(args, local_branch, use_snapshot):
# Verify that all nodes run the expected CCF version
for node in nodes:
# Note: /node/version endpoint was added in 2.x
if not node.version or node.version > 1:
if not node.major_version or node.major_version > 1:
with node.client() as c:
r = c.get("/node/version")
expected_version = node.version or args.ccf_version
@ -383,7 +376,7 @@ def run_ledger_compatibility_since_first(args, local_branch, use_snapshot):
jwt_issuer.refresh_keys()
# Note: /gov/jwt_keys/all endpoint was added in 2.x
primary, _ = network.find_nodes()
if primary.version and primary.version > 1:
if not primary.major_version or primary.major_version > 1:
jwt_issuer.wait_for_refresh(network)
else:
time.sleep(3)
@ -394,7 +387,7 @@ def run_ledger_compatibility_since_first(args, local_branch, use_snapshot):
install_path,
binary_dir,
library_dir,
major_version,
version,
)
snapshot_dir = (

Просмотреть файл

@ -252,14 +252,14 @@ def test_node_replacement(network, args):
# Add in a node using the same address
replacement_node = network.create_node(
f"local://{node_to_replace.host}:{node_to_replace.rpc_port}",
f"local://{node_to_replace.rpc_host}:{node_to_replace.rpc_port}",
node_port=node_to_replace.node_port,
)
network.join_node(replacement_node, args.package, args, from_snapshot=False)
network.trust_node(replacement_node, args)
assert replacement_node.node_id != node_to_replace.node_id
assert replacement_node.host == node_to_replace.host
assert replacement_node.rpc_host == node_to_replace.rpc_host
assert replacement_node.node_port == node_to_replace.node_port
assert replacement_node.rpc_port == node_to_replace.rpc_port
LOG.info(

Просмотреть файл

@ -10,4 +10,5 @@ docutils
python-iptables
py-spy
GitPython
better_exceptions
docker
better_exceptions