save notebook cumreward plots to disk (#145)
* notebook cumreward plot to disk --------- Co-authored-by: William Blum <william.blum@microsoft.com>
This commit is contained in:
Родитель
09622b869c
Коммит
4eabac5e60
|
@ -96,12 +96,14 @@ def plot_all_episodes(r):
|
|||
plt.show()
|
||||
|
||||
|
||||
def plot_averaged_cummulative_rewards(title, all_runs, show=True):
|
||||
def plot_averaged_cummulative_rewards(title, all_runs, show=True, save_at=None):
|
||||
"""Plot averaged cumulative rewards"""
|
||||
new_plot(title)
|
||||
for r in all_runs:
|
||||
plot_episodes_rewards_averaged(r)
|
||||
plt.legend(loc="lower right")
|
||||
if save_at:
|
||||
plt.savefig(save_at)
|
||||
if show:
|
||||
plt.show()
|
||||
|
||||
|
|
|
@ -72,6 +72,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"import logging\n",
|
||||
"import gymnasium as gym\n",
|
||||
"import cyberbattle.agents.baseline.learner as learner\n",
|
||||
|
@ -81,6 +82,7 @@
|
|||
"import cyberbattle.agents.baseline.agent_tabularqlearning as tqa\n",
|
||||
"import cyberbattle.agents.baseline.agent_dql as dqla\n",
|
||||
"from cyberbattle.agents.baseline.agent_wrapper import Verbosity\n",
|
||||
"from cyberbattle._env.cyberbattle_env import CyberBattleEnv\n",
|
||||
"\n",
|
||||
"logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format=\"%(levelname)s: %(message)s\")\n",
|
||||
"%matplotlib inline"
|
||||
|
@ -111,59 +113,14 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# Papermill notebook parameters\n",
|
||||
"\n",
|
||||
"#############\n",
|
||||
"# gymid = 'CyberBattleTiny-v0'\n",
|
||||
"#############\n",
|
||||
"gymid = \"CyberBattleToyCtf-v0\"\n",
|
||||
"env_size = None\n",
|
||||
"iteration_count = 1500\n",
|
||||
"training_episode_count = 20\n",
|
||||
"eval_episode_count = 10\n",
|
||||
"maximum_node_count = 12\n",
|
||||
"maximum_total_credentials = 10\n",
|
||||
"#############\n",
|
||||
"# gymid = \"CyberBattleChain-v0\"\n",
|
||||
"# env_size = 10\n",
|
||||
"# iteration_count = 9000\n",
|
||||
"# training_episode_count = 50\n",
|
||||
"# eval_episode_count = 5\n",
|
||||
"# maximum_node_count = 22\n",
|
||||
"# maximum_total_credentials = 22"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "encouraging-shoot",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2024-08-04T03:01:55.636085Z",
|
||||
"iopub.status.busy": "2024-08-04T03:01:55.635325Z",
|
||||
"iopub.status.idle": "2024-08-04T03:01:55.641049Z",
|
||||
"shell.execute_reply": "2024-08-04T03:01:55.640123Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.011052,
|
||||
"end_time": "2024-08-04T03:01:55.642618",
|
||||
"exception": false,
|
||||
"start_time": "2024-08-04T03:01:55.631566",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": [
|
||||
"injected-parameters"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Parameters\n",
|
||||
"gymid = \"CyberBattleChain-v0\"\n",
|
||||
"iteration_count = 9000\n",
|
||||
"training_episode_count = 50\n",
|
||||
"eval_episode_count = 5\n",
|
||||
"maximum_node_count = 22\n",
|
||||
"maximum_total_credentials = 22\n",
|
||||
"env_size = 10"
|
||||
"env_size = 10\n",
|
||||
"plots_dir = \"plots\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -188,7 +145,7 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cyberbattle._env.cyberbattle_env import CyberBattleEnv\n",
|
||||
"os.makedirs(plots_dir, exist_ok=True)\n",
|
||||
"\n",
|
||||
"# Load the Gym environment\n",
|
||||
"if env_size:\n",
|
||||
|
@ -144988,6 +144945,7 @@
|
|||
" f\"State: {[f.name() for f in themodel.state_space.feature_selection]} \"\n",
|
||||
" f\"({len(themodel.state_space.feature_selection)}\\n\"\n",
|
||||
" f\"Action: abstract_action ({themodel.action_space.flat_size()})\",\n",
|
||||
" save_at=os.path.join(plots_dir, \"benchmark-chain-cumrewards.png\"),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
@ -145037,7 +144995,8 @@
|
|||
"source": [
|
||||
"contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run]\n",
|
||||
"p.plot_episodes_length(contenders)\n",
|
||||
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders)"
|
||||
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders,\n",
|
||||
" save_at=os.path.join(plots_dir, \"benchmark-chain-cumreward_contenders.png\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -145154,4 +145113,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,7 +71,6 @@
|
|||
"import cyberbattle.agents.baseline.agent_dql as dqla\n",
|
||||
"from cyberbattle.agents.baseline.agent_wrapper import Verbosity\n",
|
||||
"import os\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format=\"%(levelname)s: %(message)s\")\n",
|
||||
"%matplotlib inline"
|
||||
|
@ -470,6 +469,7 @@
|
|||
" f\"State: {[f.name() for f in themodel.state_space.feature_selection]} \"\n",
|
||||
" f\"({len(themodel.state_space.feature_selection)}\\n\"\n",
|
||||
" f\"Action: abstract_action ({themodel.action_space.flat_size()})\",\n",
|
||||
" save_at=os.path.join(plots_dir, \"benchmark-tiny-cumrewards.png\"),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
@ -498,10 +498,8 @@
|
|||
"source": [
|
||||
"contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run]\n",
|
||||
"p.plot_episodes_length(contenders)\n",
|
||||
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders, show=False)\n",
|
||||
"\n",
|
||||
"plt.savefig(os.path.join(plots_dir, \"benchmark-tiny-finalplot.png\"))\n",
|
||||
"plt.show()"
|
||||
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders,\n",
|
||||
" save_at=os.path.join(plots_dir, \"benchmark-tiny-cumreward_contenders.png\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -576,4 +574,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,6 +62,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"import logging\n",
|
||||
"import gymnasium as gym\n",
|
||||
"import cyberbattle.agents.baseline.learner as learner\n",
|
||||
|
@ -125,10 +126,6 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# Papermill notebook parameters\n",
|
||||
"\n",
|
||||
"#############\n",
|
||||
"# gymid = 'CyberBattleTiny-v0'\n",
|
||||
"#############\n",
|
||||
"gymid = \"CyberBattleToyCtf-v0\"\n",
|
||||
"env_size = None\n",
|
||||
"iteration_count = 1500\n",
|
||||
|
@ -136,14 +133,7 @@
|
|||
"eval_episode_count = 10\n",
|
||||
"maximum_node_count = 12\n",
|
||||
"maximum_total_credentials = 10\n",
|
||||
"#############\n",
|
||||
"# gymid = \"CyberBattleChain-v0\"\n",
|
||||
"# env_size = 10\n",
|
||||
"# iteration_count = 9000\n",
|
||||
"# training_episode_count = 50\n",
|
||||
"# eval_episode_count = 5\n",
|
||||
"# maximum_node_count = 22\n",
|
||||
"# maximum_total_credentials = 22"
|
||||
"plots_dir = \"output/plots\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -176,7 +166,8 @@
|
|||
"training_episode_count = 20\n",
|
||||
"eval_episode_count = 10\n",
|
||||
"maximum_node_count = 12\n",
|
||||
"maximum_total_credentials = 10"
|
||||
"maximum_total_credentials = 10\n",
|
||||
"plots_dir = \"output/plots\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -201,6 +192,8 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.makedirs(plots_dir, exist_ok=True)\n",
|
||||
"\n",
|
||||
"# Load the Gym environment\n",
|
||||
"if env_size:\n",
|
||||
" _gym_env = gym.make(gymid, size=env_size)\n",
|
||||
|
@ -192540,6 +192533,8 @@
|
|||
" f\"State: {[f.name() for f in themodel.state_space.feature_selection]} \"\n",
|
||||
" f\"({len(themodel.state_space.feature_selection)}\\n\"\n",
|
||||
" f\"Action: abstract_action ({themodel.action_space.flat_size()})\",\n",
|
||||
" save_at=os.path.join(plots_dir, \"benchmark-toyctf-cumrewards.png\"),\n",
|
||||
"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
@ -192589,7 +192584,8 @@
|
|||
"source": [
|
||||
"contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run]\n",
|
||||
"p.plot_episodes_length(contenders)\n",
|
||||
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders)"
|
||||
"p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders,\n",
|
||||
" save_at=os.path.join(plots_dir, \"benchmark-toyctf-cumrewards_contenders.png\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -192705,4 +192701,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
|
|
@ -226,7 +226,18 @@
|
|||
"source": [
|
||||
"iteration_count = 9000\n",
|
||||
"training_episode_count = 50\n",
|
||||
"eval_episode_count = 10"
|
||||
"eval_episode_count = 10\n",
|
||||
"plots_dir = \"output/images\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "65e34f4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.makedirs(plots_dir, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -43536,7 +43547,7 @@
|
|||
" iteration_count=iteration_count,\n",
|
||||
" epsilon=0.0, # 0.35,\n",
|
||||
" render=False,\n",
|
||||
" render_last_episode_rewards_to=\"images/chain10\",\n",
|
||||
" render_last_episode_rewards_to=os.path.join(plots_dir, \"dql_transfer-chain10\"),\n",
|
||||
" title=\"Exploiting DQL\",\n",
|
||||
" verbosity=Verbosity.Quiet,\n",
|
||||
")"
|
||||
|
|
|
@ -68,12 +68,13 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cyberbattle._env.cyberbattle_env import AttackerGoal\n",
|
||||
"from cyberbattle.agents.baseline.agent_randomcredlookup import CredentialCacheExploiter\n",
|
||||
"import cyberbattle.agents.baseline.learner as learner\n",
|
||||
"import os\n",
|
||||
"import gymnasium as gym\n",
|
||||
"import logging\n",
|
||||
"import sys\n",
|
||||
"from cyberbattle._env.cyberbattle_env import AttackerGoal\n",
|
||||
"from cyberbattle.agents.baseline.agent_randomcredlookup import CredentialCacheExploiter\n",
|
||||
"import cyberbattle.agents.baseline.learner as learner\n",
|
||||
"import cyberbattle.agents.baseline.plotting as p\n",
|
||||
"import cyberbattle.agents.baseline.agent_wrapper as w\n",
|
||||
"from cyberbattle.agents.baseline.agent_wrapper import Verbosity"
|
||||
|
@ -194,7 +195,8 @@
|
|||
"source": [
|
||||
"iteration_count = 9000\n",
|
||||
"training_episode_count = 50\n",
|
||||
"eval_episode_count = 5"
|
||||
"eval_episode_count = 5\n",
|
||||
"plots_dir = 'plots'"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -59089,6 +59091,8 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"os.makedirs(plots_dir, exist_ok=True)\n",
|
||||
"\n",
|
||||
"credexplot = learner.epsilon_greedy_search(\n",
|
||||
" cyberbattlechain_10,\n",
|
||||
" learner=CredentialCacheExploiter(),\n",
|
||||
|
@ -63805,7 +63809,8 @@
|
|||
"p.plot_all_episodes(credexplot)\n",
|
||||
"\n",
|
||||
"all_runs = [credexplot, randomlearning_results]\n",
|
||||
"p.plot_averaged_cummulative_rewards(title=f\"Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\\n\", all_runs=all_runs)"
|
||||
"p.plot_averaged_cummulative_rewards(title=f\"Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\\n\", all_runs=all_runs,\n",
|
||||
" save_at=os.path.join(plots_dir, \"randlookups-cumreward.png\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -63862,4 +63867,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,11 +69,12 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"import logging\n",
|
||||
"from typing import cast\n",
|
||||
"import gymnasium as gym\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt # type:ignore\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from cyberbattle.agents.baseline.learner import TrainedLearner\n",
|
||||
"import cyberbattle.agents.baseline.plotting as p\n",
|
||||
"import cyberbattle.agents.baseline.agent_wrapper as w\n",
|
||||
|
@ -172,7 +173,8 @@
|
|||
"eval_episode_count = 5\n",
|
||||
"gamma_sweep = [\n",
|
||||
" 0.015, # about right\n",
|
||||
"]"
|
||||
"]\n",
|
||||
"plots_dir = 'output/plots'"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -181,6 +183,16 @@
|
|||
"id": "0cdf621d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.makedirs(plots_dir, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "004c0ad8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def qlearning_run(gamma, gym_env):\n",
|
||||
" \"\"\"Execute one run of the q-learning algorithm for the\n",
|
||||
|
@ -38410,6 +38422,7 @@
|
|||
" f\"Q1={[f.name() for f in Q_source_10.state_space.feature_selection]} \"\n",
|
||||
" f\"-> {[f.name() for f in Q_source_10.action_space.feature_selection]})\\n\"\n",
|
||||
" f\"Q2={[f.name() for f in Q_attack_10.state_space.feature_selection]} -> 'action'\",\n",
|
||||
" save_at=os.path.join(plots_dir, \"benchmark-tabularq-cumrewards.png\")\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
@ -72401,9 +72414,9 @@
|
|||
"cell_metadata_filter": "title,-all"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python [conda env:cybersim]",
|
||||
"display_name": "cybersim",
|
||||
"language": "python",
|
||||
"name": "conda-env-cybersim-py"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
@ -72432,4 +72445,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,10 +68,10 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"import logging\n",
|
||||
"import gymnasium as gym\n",
|
||||
"import importlib\n",
|
||||
"\n",
|
||||
"import cyberbattle.agents.baseline.learner as learner\n",
|
||||
"import cyberbattle.agents.baseline.plotting as p\n",
|
||||
"import cyberbattle.agents.baseline.agent_wrapper as w\n",
|
||||
|
@ -18833,61 +18833,6 @@
|
|||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "d5ec9a83-bd2b-4039-8601-b1ae8355b1fd",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2024-08-05T19:09:46.981034Z",
|
||||
"iopub.status.busy": "2024-08-05T19:09:46.980424Z",
|
||||
"iopub.status.idle": "2024-08-05T19:09:47.030110Z",
|
||||
"shell.execute_reply": "2024-08-05T19:09:47.028888Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.155751,
|
||||
"end_time": "2024-08-05T19:09:47.033105",
|
||||
"exception": false,
|
||||
"start_time": "2024-08-05T19:09:46.877354",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib\n",
|
||||
"\n",
|
||||
"# Plots\n",
|
||||
"all_runs = [credlookup_run, dqn_with_defender, dql_exploit_run]\n",
|
||||
"p.plot_averaged_cummulative_rewards(all_runs=all_runs, title=f\"Attacker agents vs Basic Defender -- rewards\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", show=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "664255bf-d85e-4579-b388-8bb43fe0e813",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2024-08-05T19:09:47.319563Z",
|
||||
"iopub.status.busy": "2024-08-05T19:09:47.318510Z",
|
||||
"iopub.status.idle": "2024-08-05T19:09:47.364630Z",
|
||||
"shell.execute_reply": "2024-08-05T19:09:47.362718Z"
|
||||
},
|
||||
"papermill": {
|
||||
"duration": 0.182194,
|
||||
"end_time": "2024-08-05T19:09:47.367834",
|
||||
"exception": false,
|
||||
"start_time": "2024-08-05T19:09:47.185640",
|
||||
"status": "completed"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# p.plot_episodes_length(all_runs)\n",
|
||||
"p.plot_averaged_availability(title=f\"Attacker agents vs Basic Defender -- availability\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", all_runs=all_runs, show=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
|
@ -18931,11 +18876,11 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.makedirs(plots_dir, exist_ok=True)\n",
|
||||
"matplotlib.pyplot.savefig(os.path.join(plots_dir, \"withdefender-finalplot.png\"))\n",
|
||||
"matplotlib.pyplot.show()"
|
||||
"# Plots\n",
|
||||
"all_runs = [credlookup_run, dqn_with_defender, dql_exploit_run]\n",
|
||||
"p.plot_averaged_cummulative_rewards(all_runs=all_runs, title=f\"Attacker agents vs Basic Defender -- rewards\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", save_at=os.path.join(plots_dir, \"withdefender-cumreward.png\"))\n",
|
||||
"# p.plot_episodes_length(all_runs)\n",
|
||||
"p.plot_averaged_availability(title=f\"Attacker agents vs Basic Defender -- availability\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", all_runs=all_runs, show=False)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -18975,4 +18920,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
# Push the latest benchmark run to git under the tag 'latest_benchmark'
|
||||
# and a daily tag with the date
|
||||
set -ex
|
||||
|
||||
THIS_DIR=$(dirname "$0")
|
||||
|
||||
BENCHMARK_DIR=$THIS_DIR/benchmarks
|
||||
|
||||
cp -r $THIS_DIR/output/benchmark $BENCHMARK_DIR
|
||||
|
||||
git add $BENCHMARK_DIR
|
||||
|
||||
# if there are no changes, push to git
|
||||
if [ -z "$(git status --porcelain)" ]; then
|
||||
echo "No changes to commit"
|
||||
else
|
||||
git commit -m "latest benchmark"
|
||||
fi
|
||||
|
||||
# push the changes to git under tag 'latest_benchmark'
|
||||
git tag -f latest_benchmark
|
||||
git push -f origin latest_benchmark
|
||||
|
||||
# create a daily tag with the date
|
||||
tagname=benchmark-$(date +%Y-%m-%d)
|
||||
git tag -f $tagname
|
||||
git push -f origin $tagname
|
|
@ -1,4 +1,4 @@
|
|||
# Run all the Jupyter notebooks and write the output to disk
|
||||
# Run all the Jupyter notebooks in quick test mode (small number of iteartions and episodes) and write the output to disk
|
||||
|
||||
set -ex
|
||||
|
||||
|
@ -11,14 +11,18 @@ script_dir=$(dirname "$0")
|
|||
|
||||
pushd "$script_dir/.."
|
||||
|
||||
output_dir=notebooks/output/quick
|
||||
output_plot_dir=$output_dir/plots
|
||||
|
||||
run () {
|
||||
base=$1
|
||||
papermill --kernel $kernel notebooks/$base.ipynb notebooks/output/$base.ipynb "${@:2}"
|
||||
papermill --kernel $kernel notebooks/$base.ipynb $output_dir/$base.ipynb "${@:2}"
|
||||
}
|
||||
|
||||
jupyter kernelspec list
|
||||
|
||||
mkdir notebooks/output -p
|
||||
mkdir $output_dir -p
|
||||
mkdir $output_plot_dir -p
|
||||
|
||||
# run c2_interactive_interface # disabled: not deterministic and can fail
|
||||
|
||||
|
@ -30,12 +34,20 @@ run toyctf-random
|
|||
|
||||
run toyctf-solved
|
||||
|
||||
run chainnetwork-optionwrapper
|
||||
|
||||
run chainnetwork-random -y "
|
||||
iterations: 100
|
||||
"
|
||||
run randomnetwork
|
||||
|
||||
run notebook_benchmark-toyctf -y "
|
||||
iteration_count: 100
|
||||
training_episode_count: 3
|
||||
eval_episode_count: 5
|
||||
maximum_node_count: 12
|
||||
maximum_total_credentials: 10
|
||||
plots_dir: $output_plot_dir
|
||||
"
|
||||
|
||||
run notebook_benchmark-chain -y "
|
||||
|
@ -44,6 +56,7 @@ run notebook_benchmark-chain -y "
|
|||
eval_episode_count: 3
|
||||
maximum_node_count: 12
|
||||
maximum_total_credentials: 7
|
||||
plots_dir: $output_plot_dir
|
||||
"
|
||||
|
||||
run notebook_benchmark-tiny -y "
|
||||
|
@ -52,39 +65,34 @@ run notebook_benchmark-tiny -y "
|
|||
eval_episode_count: 2
|
||||
maximum_node_count: 5
|
||||
maximum_total_credentials: 3
|
||||
plots_dir: notebooks/output/plots
|
||||
plots_dir: $output_plot_dir
|
||||
"
|
||||
|
||||
run notebook_dql_transfer -y "
|
||||
iteration_count: 500
|
||||
training_episode_count: 5
|
||||
eval_episode_count: 3
|
||||
plots_dir: $output_plot_dir
|
||||
"
|
||||
|
||||
run chainnetwork-optionwrapper
|
||||
|
||||
run chainnetwork-random -y "
|
||||
iterations: 100
|
||||
"
|
||||
|
||||
run randomnetwork
|
||||
|
||||
run notebook_randlookups -y "
|
||||
iteration_count: 500
|
||||
training_episode_count: 5
|
||||
eval_episode_count: 2
|
||||
plots_dir: $output_plot_dir
|
||||
"""
|
||||
|
||||
run notebook_tabularq -y "
|
||||
iteration_count: 200
|
||||
training_episode_count: 5
|
||||
eval_episode_count: 2
|
||||
iteration_count: 200
|
||||
training_episode_count: 5
|
||||
eval_episode_count: 2
|
||||
plots_dir: $output_plot_dir
|
||||
"
|
||||
|
||||
run notebook_withdefender -y "
|
||||
iteration_count: 100
|
||||
training_episode_count: 3
|
||||
plots_dir: notebooks/output/plots
|
||||
plots_dir: $output_plot_dir
|
||||
"
|
||||
|
||||
run dql_active_directory -y "
|
||||
|
@ -92,4 +100,5 @@ run dql_active_directory -y "
|
|||
iteration_count: 50
|
||||
"
|
||||
|
||||
|
||||
popd
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
# Run benchmarking notebooks
|
||||
|
||||
set -ex
|
||||
|
||||
kernel=$1
|
||||
if [ -z "$kernel" ]; then
|
||||
kernel=cybersim
|
||||
fi
|
||||
|
||||
script_dir=$(dirname "$0")
|
||||
|
||||
pushd "$script_dir/.."
|
||||
|
||||
output_dir=notebooks/output/benchmark
|
||||
output_plot_dir=$output_dir/plots
|
||||
|
||||
|
||||
run () {
|
||||
base=$1
|
||||
papermill --kernel $kernel notebooks/$base.ipynb $output_dir/$base.ipynb "${@:2}"
|
||||
}
|
||||
|
||||
jupyter kernelspec list
|
||||
|
||||
mkdir $output_dir -p
|
||||
mkdir $output_plot_dir -p
|
||||
|
||||
run notebook_benchmark-chain -y "
|
||||
gymid: "CyberBattleChain-v0"
|
||||
iteration_count: 2000
|
||||
training_episode_count: 20
|
||||
eval_episode_count: 3
|
||||
maximum_node_count: 20
|
||||
maximum_total_credentials: 20
|
||||
env_size: 14
|
||||
plots_dir: $output_plot_dir
|
||||
"
|
||||
|
||||
|
||||
popd
|
Загрузка…
Ссылка в новой задаче