CCF/tests/recovery_benchmark.sh

140 строки
4.2 KiB
Bash
Executable File

#!/bin/bash
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the Apache 2.0 License.
# This script measures how long the recovery procedure takes.
# Note that the script makes uses of the sandbox and as such,
# the timing results are rough (+/- a few seconds).
# Usage: $ cd CCF/build && ../tests/recovery_benchmark.sh /opt/ccf [--with-snapshot] [--sig-tx-interval 100] [--load-run-time-s 30]
if [ -z "$1" ]; then
echo "Error: First argument should be CCF install path"
exit 1
fi
ccf_install_path=$1
shift
with_snapshot=false
signature_tx_interval=10000 # CCF default
load_run_time_s=20
while [ "$1" != "" ]; do
case $1 in
--with-snapshot)
with_snapshot=true
;;
--sig-tx-interval)
signature_tx_interval="$2"
shift
;;
--load-run-time-s)
load_run_time_s="$2"
shift
;;
*)
break
esac
shift
done
set -e
function service_http_status()
{
curl -o /dev/null -s https://127.0.0.1:8000/commit -w "%{http_code}" --cacert ./workspace/sandbox_common/service_cert.pem
}
function current_ledger_length()
{
curl -s https://127.0.0.1:8000/node/commit --cacert ./workspace/sandbox_common/service_cert.pem | jq '.transaction_id | split(".")[1] | tonumber'
}
function poll_for_service_open()
{
network_live_time=$1
sandbox_pid=$2
polls=0
while [ ! "$(service_http_status)" == "200" ] && [ "${polls}" -lt "${network_live_time}" ]; do
echo "Waiting for service to open..."
polls=$((polls+1))
sleep 1
if ! ps -p "${sandbox_pid}" > /dev/null; then
echo "Sandbox process has terminated"
return 0
fi
done
if [ "$(service_http_status)" == "200" ]; then
return 1
fi
return 0
}
function cleanup() {
kill "$(jobs -p)"
}
trap cleanup EXIT
if [ -n "$PYTHON_PACKAGE_PATH" ]; then
PYTHON_PACKAGE_PATH=$(realpath -s "${PYTHON_PACKAGE_PATH}")
fi
echo "** Start original service"
"${ccf_install_path}"/bin/sandbox.sh --sig-tx-interval "${signature_tx_interval}" &
sandbox_pid=$!
network_live_time=60
if poll_for_service_open ${network_live_time} ${sandbox_pid}; then
echo "Error: Timeout waiting ${network_live_time}s for service to open"
kill "$(jobs -p)"
exit 1
fi
echo "** Load service"
python3.8 -m venv .recovery_bench_env
source .recovery_bench_env/bin/activate
python -m pip -q install locust
locust --headless --locustfile ../tests/infra/locust_file.py --ca ./workspace/sandbox_common/service_cert.pem --key ./workspace/sandbox_common/user0_privk.pem --cert ./workspace/sandbox_common/user0_cert.pem --spawn-rate 100 --users 100 --rate 1000000 --node-host https://127.0.0.1:8000 --host https://0.0.0.0 --run-time "${load_run_time_s}s"
entries_to_recover=$(current_ledger_length)
echo "** Stop service"
kill $sandbox_pid
echo "** Copy data from defunct service"
LEDGER_DIR="0.ledger/"
SNAPSHOTS_DIR="0.snapshots/"
rm -rf $LEDGER_DIR $SNAPSHOTS_DIR
cp -R ./workspace/sandbox_0/$LEDGER_DIR .
cp -R ./workspace/sandbox_0/$SNAPSHOTS_DIR .
recovery_snapshot_dir_args=""
if [ "$with_snapshot" = true ]; then
recovery_snapshot_dir_args="--snapshots-dir $SNAPSHOTS_DIR"
fi
echo "** Recover service"
seconds_before_recovery=$SECONDS
# shellcheck disable=SC2086
"${ccf_install_path}"/bin/sandbox.sh --recover --ledger-dir $LEDGER_DIR --common-dir ./workspace/sandbox_common --ledger-recovery-timeout 1000 ${recovery_snapshot_dir_args} &
sandbox_pid=$!
network_live_time=600
if poll_for_service_open ${network_live_time} ${sandbox_pid}; then
echo "Error: Timeout waiting ${network_live_time}s for service to open"
kill "$(jobs -p)"
exit 1
fi
entries_final=$(current_ledger_length)
if [ "${entries_final}" -lt "${entries_to_recover}" ]; then
echo "Error: not all entries were recovered (expected ${entries_to_recover} but only recovered ${entries_final})"
exit 1
fi
total_recovery_time=$((SECONDS-seconds_before_recovery))
echo "** Successfully recovered ${entries_final} entries"
echo "Total recovery time: $total_recovery_time secs [# entries: ${entries_to_recover}, with snapshot: ${with_snapshot}, sig interval: ${signature_tx_interval}]"