save notebook cumreward plots to disk (#145)

* notebook cumreward plot to disk

---------

Co-authored-by: William Blum <william.blum@microsoft.com>
This commit is contained in:
William Blum 2024-08-07 13:53:42 -07:00 коммит произвёл GitHub
Родитель 09622b869c
Коммит 4eabac5e60
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
11 изменённых файлов: 168 добавлений и 163 удалений

Просмотреть файл

@ -96,12 +96,14 @@ def plot_all_episodes(r):
plt.show()
def plot_averaged_cummulative_rewards(title, all_runs, show=True):
def plot_averaged_cummulative_rewards(title, all_runs, show=True, save_at=None):
"""Plot averaged cumulative rewards"""
new_plot(title)
for r in all_runs:
plot_episodes_rewards_averaged(r)
plt.legend(loc="lower right")
if save_at:
plt.savefig(save_at)
if show:
plt.show()

Просмотреть файл

@ -72,6 +72,7 @@
"outputs": [],
"source": [
"import sys\n",
"import os\n",
"import logging\n",
"import gymnasium as gym\n",
"import cyberbattle.agents.baseline.learner as learner\n",
@ -81,6 +82,7 @@
"import cyberbattle.agents.baseline.agent_tabularqlearning as tqa\n",
"import cyberbattle.agents.baseline.agent_dql as dqla\n",
"from cyberbattle.agents.baseline.agent_wrapper import Verbosity\n",
"from cyberbattle._env.cyberbattle_env import CyberBattleEnv\n",
"\n",
"logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format=\"%(levelname)s: %(message)s\")\n",
"%matplotlib inline"
@ -111,59 +113,14 @@
"outputs": [],
"source": [
"# Papermill notebook parameters\n",
"\n",
"#############\n",
"# gymid = 'CyberBattleTiny-v0'\n",
"#############\n",
"gymid = \"CyberBattleToyCtf-v0\"\n",
"env_size = None\n",
"iteration_count = 1500\n",
"training_episode_count = 20\n",
"eval_episode_count = 10\n",
"maximum_node_count = 12\n",
"maximum_total_credentials = 10\n",
"#############\n",
"# gymid = \"CyberBattleChain-v0\"\n",
"# env_size = 10\n",
"# iteration_count = 9000\n",
"# training_episode_count = 50\n",
"# eval_episode_count = 5\n",
"# maximum_node_count = 22\n",
"# maximum_total_credentials = 22"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "encouraging-shoot",
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-04T03:01:55.636085Z",
"iopub.status.busy": "2024-08-04T03:01:55.635325Z",
"iopub.status.idle": "2024-08-04T03:01:55.641049Z",
"shell.execute_reply": "2024-08-04T03:01:55.640123Z"
},
"papermill": {
"duration": 0.011052,
"end_time": "2024-08-04T03:01:55.642618",
"exception": false,
"start_time": "2024-08-04T03:01:55.631566",
"status": "completed"
},
"tags": [
"injected-parameters"
]
},
"outputs": [],
"source": [
"# Parameters\n",
"gymid = \"CyberBattleChain-v0\"\n",
"iteration_count = 9000\n",
"training_episode_count = 50\n",
"eval_episode_count = 5\n",
"maximum_node_count = 22\n",
"maximum_total_credentials = 22\n",
"env_size = 10"
"env_size = 10\n",
"plots_dir = \"plots\"\n"
]
},
{
@ -188,7 +145,7 @@
},
"outputs": [],
"source": [
"from cyberbattle._env.cyberbattle_env import CyberBattleEnv\n",
"os.makedirs(plots_dir, exist_ok=True)\n",
"\n",
"# Load the Gym environment\n",
"if env_size:\n",
@ -144988,6 +144945,7 @@
" f\"State: {[f.name() for f in themodel.state_space.feature_selection]} \"\n",
" f\"({len(themodel.state_space.feature_selection)}\\n\"\n",
" f\"Action: abstract_action ({themodel.action_space.flat_size()})\",\n",
" save_at=os.path.join(plots_dir, \"benchmark-chain-cumrewards.png\"),\n",
")"
]
},
@ -145037,7 +144995,8 @@
"source": [
"contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run]\n",
"p.plot_episodes_length(contenders)\n",
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders)"
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders,\n",
" save_at=os.path.join(plots_dir, \"benchmark-chain-cumreward_contenders.png\"))"
]
},
{
@ -145154,4 +145113,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

Просмотреть файл

@ -71,7 +71,6 @@
"import cyberbattle.agents.baseline.agent_dql as dqla\n",
"from cyberbattle.agents.baseline.agent_wrapper import Verbosity\n",
"import os\n",
"import matplotlib.pyplot as plt\n",
"\n",
"logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format=\"%(levelname)s: %(message)s\")\n",
"%matplotlib inline"
@ -470,6 +469,7 @@
" f\"State: {[f.name() for f in themodel.state_space.feature_selection]} \"\n",
" f\"({len(themodel.state_space.feature_selection)}\\n\"\n",
" f\"Action: abstract_action ({themodel.action_space.flat_size()})\",\n",
" save_at=os.path.join(plots_dir, \"benchmark-tiny-cumrewards.png\"),\n",
")"
]
},
@ -498,10 +498,8 @@
"source": [
"contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run]\n",
"p.plot_episodes_length(contenders)\n",
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders, show=False)\n",
"\n",
"plt.savefig(os.path.join(plots_dir, \"benchmark-tiny-finalplot.png\"))\n",
"plt.show()"
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders,\n",
" save_at=os.path.join(plots_dir, \"benchmark-tiny-cumreward_contenders.png\"))"
]
},
{
@ -576,4 +574,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

Просмотреть файл

@ -62,6 +62,7 @@
"outputs": [],
"source": [
"import sys\n",
"import os\n",
"import logging\n",
"import gymnasium as gym\n",
"import cyberbattle.agents.baseline.learner as learner\n",
@ -125,10 +126,6 @@
"outputs": [],
"source": [
"# Papermill notebook parameters\n",
"\n",
"#############\n",
"# gymid = 'CyberBattleTiny-v0'\n",
"#############\n",
"gymid = \"CyberBattleToyCtf-v0\"\n",
"env_size = None\n",
"iteration_count = 1500\n",
@ -136,14 +133,7 @@
"eval_episode_count = 10\n",
"maximum_node_count = 12\n",
"maximum_total_credentials = 10\n",
"#############\n",
"# gymid = \"CyberBattleChain-v0\"\n",
"# env_size = 10\n",
"# iteration_count = 9000\n",
"# training_episode_count = 50\n",
"# eval_episode_count = 5\n",
"# maximum_node_count = 22\n",
"# maximum_total_credentials = 22"
"plots_dir = \"output/plots\"\n"
]
},
{
@ -176,7 +166,8 @@
"training_episode_count = 20\n",
"eval_episode_count = 10\n",
"maximum_node_count = 12\n",
"maximum_total_credentials = 10"
"maximum_total_credentials = 10\n",
"plots_dir = \"output/plots\""
]
},
{
@ -201,6 +192,8 @@
},
"outputs": [],
"source": [
"os.makedirs(plots_dir, exist_ok=True)\n",
"\n",
"# Load the Gym environment\n",
"if env_size:\n",
" _gym_env = gym.make(gymid, size=env_size)\n",
@ -192540,6 +192533,8 @@
" f\"State: {[f.name() for f in themodel.state_space.feature_selection]} \"\n",
" f\"({len(themodel.state_space.feature_selection)}\\n\"\n",
" f\"Action: abstract_action ({themodel.action_space.flat_size()})\",\n",
" save_at=os.path.join(plots_dir, \"benchmark-toyctf-cumrewards.png\"),\n",
"\n",
")"
]
},
@ -192589,7 +192584,8 @@
"source": [
"contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run]\n",
"p.plot_episodes_length(contenders)\n",
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders)"
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders,\n",
" save_at=os.path.join(plots_dir, \"benchmark-toyctf-cumrewards_contenders.png\"))"
]
},
{
@ -192705,4 +192701,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

Просмотреть файл

@ -226,7 +226,18 @@
"source": [
"iteration_count = 9000\n",
"training_episode_count = 50\n",
"eval_episode_count = 10"
"eval_episode_count = 10\n",
"plots_dir = \"output/images\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "65e34f4d",
"metadata": {},
"outputs": [],
"source": [
"os.makedirs(plots_dir, exist_ok=True)"
]
},
{
@ -43536,7 +43547,7 @@
" iteration_count=iteration_count,\n",
" epsilon=0.0, # 0.35,\n",
" render=False,\n",
" render_last_episode_rewards_to=\"images/chain10\",\n",
" render_last_episode_rewards_to=os.path.join(plots_dir, \"dql_transfer-chain10\"),\n",
" title=\"Exploiting DQL\",\n",
" verbosity=Verbosity.Quiet,\n",
")"

Просмотреть файл

@ -68,12 +68,13 @@
},
"outputs": [],
"source": [
"from cyberbattle._env.cyberbattle_env import AttackerGoal\n",
"from cyberbattle.agents.baseline.agent_randomcredlookup import CredentialCacheExploiter\n",
"import cyberbattle.agents.baseline.learner as learner\n",
"import os\n",
"import gymnasium as gym\n",
"import logging\n",
"import sys\n",
"from cyberbattle._env.cyberbattle_env import AttackerGoal\n",
"from cyberbattle.agents.baseline.agent_randomcredlookup import CredentialCacheExploiter\n",
"import cyberbattle.agents.baseline.learner as learner\n",
"import cyberbattle.agents.baseline.plotting as p\n",
"import cyberbattle.agents.baseline.agent_wrapper as w\n",
"from cyberbattle.agents.baseline.agent_wrapper import Verbosity"
@ -194,7 +195,8 @@
"source": [
"iteration_count = 9000\n",
"training_episode_count = 50\n",
"eval_episode_count = 5"
"eval_episode_count = 5\n",
"plots_dir = 'plots'"
]
},
{
@ -59089,6 +59091,8 @@
}
],
"source": [
"os.makedirs(plots_dir, exist_ok=True)\n",
"\n",
"credexplot = learner.epsilon_greedy_search(\n",
" cyberbattlechain_10,\n",
" learner=CredentialCacheExploiter(),\n",
@ -63805,7 +63809,8 @@
"p.plot_all_episodes(credexplot)\n",
"\n",
"all_runs = [credexplot, randomlearning_results]\n",
"p.plot_averaged_cummulative_rewards(title=f\"Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\\n\", all_runs=all_runs)"
"p.plot_averaged_cummulative_rewards(title=f\"Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\\n\", all_runs=all_runs,\n",
" save_at=os.path.join(plots_dir, \"randlookups-cumreward.png\"))"
]
},
{
@ -63862,4 +63867,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

Просмотреть файл

@ -69,11 +69,12 @@
"outputs": [],
"source": [
"import sys\n",
"import os\n",
"import logging\n",
"from typing import cast\n",
"import gymnasium as gym\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt # type:ignore\n",
"import matplotlib.pyplot as plt\n",
"from cyberbattle.agents.baseline.learner import TrainedLearner\n",
"import cyberbattle.agents.baseline.plotting as p\n",
"import cyberbattle.agents.baseline.agent_wrapper as w\n",
@ -172,7 +173,8 @@
"eval_episode_count = 5\n",
"gamma_sweep = [\n",
" 0.015, # about right\n",
"]"
"]\n",
"plots_dir = 'output/plots'"
]
},
{
@ -181,6 +183,16 @@
"id": "0cdf621d",
"metadata": {},
"outputs": [],
"source": [
"os.makedirs(plots_dir, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "004c0ad8",
"metadata": {},
"outputs": [],
"source": [
"def qlearning_run(gamma, gym_env):\n",
" \"\"\"Execute one run of the q-learning algorithm for the\n",
@ -38410,6 +38422,7 @@
" f\"Q1={[f.name() for f in Q_source_10.state_space.feature_selection]} \"\n",
" f\"-> {[f.name() for f in Q_source_10.action_space.feature_selection]})\\n\"\n",
" f\"Q2={[f.name() for f in Q_attack_10.state_space.feature_selection]} -> 'action'\",\n",
" save_at=os.path.join(plots_dir, \"benchmark-tabularq-cumrewards.png\")\n",
")"
]
},
@ -72401,9 +72414,9 @@
"cell_metadata_filter": "title,-all"
},
"kernelspec": {
"display_name": "Python [conda env:cybersim]",
"display_name": "cybersim",
"language": "python",
"name": "conda-env-cybersim-py"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
@ -72432,4 +72445,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

Просмотреть файл

@ -68,10 +68,10 @@
"outputs": [],
"source": [
"import sys\n",
"import os\n",
"import logging\n",
"import gymnasium as gym\n",
"import importlib\n",
"\n",
"import cyberbattle.agents.baseline.learner as learner\n",
"import cyberbattle.agents.baseline.plotting as p\n",
"import cyberbattle.agents.baseline.agent_wrapper as w\n",
@ -18833,61 +18833,6 @@
")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d5ec9a83-bd2b-4039-8601-b1ae8355b1fd",
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-05T19:09:46.981034Z",
"iopub.status.busy": "2024-08-05T19:09:46.980424Z",
"iopub.status.idle": "2024-08-05T19:09:47.030110Z",
"shell.execute_reply": "2024-08-05T19:09:47.028888Z"
},
"papermill": {
"duration": 0.155751,
"end_time": "2024-08-05T19:09:47.033105",
"exception": false,
"start_time": "2024-08-05T19:09:46.877354",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"import matplotlib\n",
"\n",
"# Plots\n",
"all_runs = [credlookup_run, dqn_with_defender, dql_exploit_run]\n",
"p.plot_averaged_cummulative_rewards(all_runs=all_runs, title=f\"Attacker agents vs Basic Defender -- rewards\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", show=False)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "664255bf-d85e-4579-b388-8bb43fe0e813",
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-05T19:09:47.319563Z",
"iopub.status.busy": "2024-08-05T19:09:47.318510Z",
"iopub.status.idle": "2024-08-05T19:09:47.364630Z",
"shell.execute_reply": "2024-08-05T19:09:47.362718Z"
},
"papermill": {
"duration": 0.182194,
"end_time": "2024-08-05T19:09:47.367834",
"exception": false,
"start_time": "2024-08-05T19:09:47.185640",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# p.plot_episodes_length(all_runs)\n",
"p.plot_averaged_availability(title=f\"Attacker agents vs Basic Defender -- availability\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", all_runs=all_runs, show=False)"
]
},
{
"cell_type": "code",
"execution_count": 9,
@ -18931,11 +18876,11 @@
}
],
"source": [
"import os\n",
"\n",
"os.makedirs(plots_dir, exist_ok=True)\n",
"matplotlib.pyplot.savefig(os.path.join(plots_dir, \"withdefender-finalplot.png\"))\n",
"matplotlib.pyplot.show()"
"# Plots\n",
"all_runs = [credlookup_run, dqn_with_defender, dql_exploit_run]\n",
"p.plot_averaged_cummulative_rewards(all_runs=all_runs, title=f\"Attacker agents vs Basic Defender -- rewards\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", save_at=os.path.join(plots_dir, \"withdefender-cumreward.png\"))\n",
"# p.plot_episodes_length(all_runs)\n",
"p.plot_averaged_availability(title=f\"Attacker agents vs Basic Defender -- availability\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", all_runs=all_runs, show=False)"
]
}
],
@ -18975,4 +18920,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

27
notebooks/publish_benchmarks.sh Executable file
Просмотреть файл

@ -0,0 +1,27 @@
# Push the latest benchmark run to git under the tag 'latest_benchmark'
# and a daily tag with the date
set -ex
THIS_DIR=$(dirname "$0")
BENCHMARK_DIR=$THIS_DIR/benchmarks
cp -r $THIS_DIR/output/benchmark $BENCHMARK_DIR
git add $BENCHMARK_DIR
# if there are no changes, push to git
if [ -z "$(git status --porcelain)" ]; then
echo "No changes to commit"
else
git commit -m "latest benchmark"
fi
# push the changes to git under tag 'latest_benchmark'
git tag -f latest_benchmark
git push -f origin latest_benchmark
# create a daily tag with the date
tagname=benchmark-$(date +%Y-%m-%d)
git tag -f $tagname
git push -f origin $tagname

Просмотреть файл

@ -1,4 +1,4 @@
# Run all the Jupyter notebooks and write the output to disk
# Run all the Jupyter notebooks in quick test mode (small number of iteartions and episodes) and write the output to disk
set -ex
@ -11,14 +11,18 @@ script_dir=$(dirname "$0")
pushd "$script_dir/.."
output_dir=notebooks/output/quick
output_plot_dir=$output_dir/plots
run () {
base=$1
papermill --kernel $kernel notebooks/$base.ipynb notebooks/output/$base.ipynb "${@:2}"
papermill --kernel $kernel notebooks/$base.ipynb $output_dir/$base.ipynb "${@:2}"
}
jupyter kernelspec list
mkdir notebooks/output -p
mkdir $output_dir -p
mkdir $output_plot_dir -p
# run c2_interactive_interface # disabled: not deterministic and can fail
@ -30,12 +34,20 @@ run toyctf-random
run toyctf-solved
run chainnetwork-optionwrapper
run chainnetwork-random -y "
iterations: 100
"
run randomnetwork
run notebook_benchmark-toyctf -y "
iteration_count: 100
training_episode_count: 3
eval_episode_count: 5
maximum_node_count: 12
maximum_total_credentials: 10
plots_dir: $output_plot_dir
"
run notebook_benchmark-chain -y "
@ -44,6 +56,7 @@ run notebook_benchmark-chain -y "
eval_episode_count: 3
maximum_node_count: 12
maximum_total_credentials: 7
plots_dir: $output_plot_dir
"
run notebook_benchmark-tiny -y "
@ -52,39 +65,34 @@ run notebook_benchmark-tiny -y "
eval_episode_count: 2
maximum_node_count: 5
maximum_total_credentials: 3
plots_dir: notebooks/output/plots
plots_dir: $output_plot_dir
"
run notebook_dql_transfer -y "
iteration_count: 500
training_episode_count: 5
eval_episode_count: 3
plots_dir: $output_plot_dir
"
run chainnetwork-optionwrapper
run chainnetwork-random -y "
iterations: 100
"
run randomnetwork
run notebook_randlookups -y "
iteration_count: 500
training_episode_count: 5
eval_episode_count: 2
plots_dir: $output_plot_dir
"""
run notebook_tabularq -y "
iteration_count: 200
training_episode_count: 5
eval_episode_count: 2
iteration_count: 200
training_episode_count: 5
eval_episode_count: 2
plots_dir: $output_plot_dir
"
run notebook_withdefender -y "
iteration_count: 100
training_episode_count: 3
plots_dir: notebooks/output/plots
plots_dir: $output_plot_dir
"
run dql_active_directory -y "
@ -92,4 +100,5 @@ run dql_active_directory -y "
iteration_count: 50
"
popd

40
notebooks/run_benchmark.sh Executable file
Просмотреть файл

@ -0,0 +1,40 @@
# Run benchmarking notebooks
set -ex
kernel=$1
if [ -z "$kernel" ]; then
kernel=cybersim
fi
script_dir=$(dirname "$0")
pushd "$script_dir/.."
output_dir=notebooks/output/benchmark
output_plot_dir=$output_dir/plots
run () {
base=$1
papermill --kernel $kernel notebooks/$base.ipynb $output_dir/$base.ipynb "${@:2}"
}
jupyter kernelspec list
mkdir $output_dir -p
mkdir $output_plot_dir -p
run notebook_benchmark-chain -y "
gymid: "CyberBattleChain-v0"
iteration_count: 2000
training_episode_count: 20
eval_episode_count: 3
maximum_node_count: 20
maximum_total_credentials: 20
env_size: 14
plots_dir: $output_plot_dir
"
popd