save notebook cumreward plots to disk (#145)

* notebook cumreward plot to disk --------- Co-authored-by: William Blum <william.blum@microsoft.com>
2024-08-07 13:53:42 -07:00 · 2024-08-07 13:53:42 -07:00 · 4eabac5e60
--- a/cyberbattle/agents/baseline/plotting.py
+++ b/cyberbattle/agents/baseline/plotting.py
@ -96,12 +96,14 @@ def plot_all_episodes(r):
    plt.show()


-def plot_averaged_cummulative_rewards(title, all_runs, show=True):
+def plot_averaged_cummulative_rewards(title, all_runs, show=True, save_at=None):
    """Plot averaged cumulative rewards"""
    new_plot(title)
    for r in all_runs:
        plot_episodes_rewards_averaged(r)
    plt.legend(loc="lower right")
+    if save_at:
+        plt.savefig(save_at)
    if show:
        plt.show()

--- a/notebooks/notebook_benchmark-chain.ipynb
+++ b/notebooks/notebook_benchmark-chain.ipynb
@ -72,6 +72,7 @@
            "outputs": [],
            "source": [
                "import sys\n",
+                "import os\n",
                "import logging\n",
                "import gymnasium as gym\n",
                "import cyberbattle.agents.baseline.learner as learner\n",
@ -81,6 +82,7 @@
                "import cyberbattle.agents.baseline.agent_tabularqlearning as tqa\n",
                "import cyberbattle.agents.baseline.agent_dql as dqla\n",
                "from cyberbattle.agents.baseline.agent_wrapper import Verbosity\n",
+                "from cyberbattle._env.cyberbattle_env import CyberBattleEnv\n",
                "\n",
                "logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format=\"%(levelname)s: %(message)s\")\n",
                "%matplotlib inline"
@ -111,59 +113,14 @@
            "outputs": [],
            "source": [
                "# Papermill notebook parameters\n",
-                "\n",
-                "#############\n",
-                "# gymid = 'CyberBattleTiny-v0'\n",
-                "#############\n",
-                "gymid = \"CyberBattleToyCtf-v0\"\n",
-                "env_size = None\n",
-                "iteration_count = 1500\n",
-                "training_episode_count = 20\n",
-                "eval_episode_count = 10\n",
-                "maximum_node_count = 12\n",
-                "maximum_total_credentials = 10\n",
-                "#############\n",
-                "# gymid = \"CyberBattleChain-v0\"\n",
-                "# env_size = 10\n",
-                "# iteration_count = 9000\n",
-                "# training_episode_count = 50\n",
-                "# eval_episode_count = 5\n",
-                "# maximum_node_count = 22\n",
-                "# maximum_total_credentials = 22"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 4,
-            "id": "encouraging-shoot",
-            "metadata": {
-                "execution": {
-                    "iopub.execute_input": "2024-08-04T03:01:55.636085Z",
-                    "iopub.status.busy": "2024-08-04T03:01:55.635325Z",
-                    "iopub.status.idle": "2024-08-04T03:01:55.641049Z",
-                    "shell.execute_reply": "2024-08-04T03:01:55.640123Z"
-                },
-                "papermill": {
-                    "duration": 0.011052,
-                    "end_time": "2024-08-04T03:01:55.642618",
-                    "exception": false,
-                    "start_time": "2024-08-04T03:01:55.631566",
-                    "status": "completed"
-                },
-                "tags": [
-                    "injected-parameters"
-                ]
-            },
-            "outputs": [],
-            "source": [
-                "# Parameters\n",
                "gymid = \"CyberBattleChain-v0\"\n",
                "iteration_count = 9000\n",
                "training_episode_count = 50\n",
                "eval_episode_count = 5\n",
                "maximum_node_count = 22\n",
                "maximum_total_credentials = 22\n",
-                "env_size = 10"
+                "env_size = 10\n",
+                "plots_dir = \"plots\"\n"
            ]
        },
        {
@ -188,7 +145,7 @@
            },
            "outputs": [],
            "source": [
-                "from cyberbattle._env.cyberbattle_env import CyberBattleEnv\n",
+                "os.makedirs(plots_dir, exist_ok=True)\n",
                "\n",
                "# Load the Gym environment\n",
                "if env_size:\n",
@ -144988,6 +144945,7 @@
                "    f\"State: {[f.name() for f in themodel.state_space.feature_selection]} \"\n",
                "    f\"({len(themodel.state_space.feature_selection)}\\n\"\n",
                "    f\"Action: abstract_action ({themodel.action_space.flat_size()})\",\n",
+                "    save_at=os.path.join(plots_dir, \"benchmark-chain-cumrewards.png\"),\n",
                ")"
            ]
        },
@ -145037,7 +144995,8 @@
            "source": [
                "contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run]\n",
                "p.plot_episodes_length(contenders)\n",
-                "p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders)"
+                "p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders,\n",
+                "                                    save_at=os.path.join(plots_dir, \"benchmark-chain-cumreward_contenders.png\"))"
            ]
        },
        {
@ -145154,4 +145113,4 @@
    },
    "nbformat": 4,
    "nbformat_minor": 5
-}
+}
--- a/notebooks/notebook_benchmark-tiny.ipynb
+++ b/notebooks/notebook_benchmark-tiny.ipynb
@ -71,7 +71,6 @@
                "import cyberbattle.agents.baseline.agent_dql as dqla\n",
                "from cyberbattle.agents.baseline.agent_wrapper import Verbosity\n",
                "import os\n",
-                "import matplotlib.pyplot as plt\n",
                "\n",
                "logging.basicConfig(stream=sys.stdout, level=logging.ERROR, format=\"%(levelname)s: %(message)s\")\n",
                "%matplotlib inline"
@ -470,6 +469,7 @@
                "    f\"State: {[f.name() for f in themodel.state_space.feature_selection]} \"\n",
                "    f\"({len(themodel.state_space.feature_selection)}\\n\"\n",
                "    f\"Action: abstract_action ({themodel.action_space.flat_size()})\",\n",
+                "    save_at=os.path.join(plots_dir, \"benchmark-tiny-cumrewards.png\"),\n",
                ")"
            ]
        },
@ -498,10 +498,8 @@
            "source": [
                "contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run]\n",
                "p.plot_episodes_length(contenders)\n",
-                "p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders, show=False)\n",
-                "\n",
-                "plt.savefig(os.path.join(plots_dir, \"benchmark-tiny-finalplot.png\"))\n",
-                "plt.show()"
+                "p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders,\n",
+                "                                    save_at=os.path.join(plots_dir, \"benchmark-tiny-cumreward_contenders.png\"))"
            ]
        },
        {
@ -576,4 +574,4 @@
    },
    "nbformat": 4,
    "nbformat_minor": 5
-}
+}
--- a/notebooks/notebook_benchmark-toyctf.ipynb
+++ b/notebooks/notebook_benchmark-toyctf.ipynb
@ -62,6 +62,7 @@
            "outputs": [],
            "source": [
                "import sys\n",
+                "import os\n",
                "import logging\n",
                "import gymnasium as gym\n",
                "import cyberbattle.agents.baseline.learner as learner\n",
@ -125,10 +126,6 @@
            "outputs": [],
            "source": [
                "# Papermill notebook parameters\n",
-                "\n",
-                "#############\n",
-                "# gymid = 'CyberBattleTiny-v0'\n",
-                "#############\n",
                "gymid = \"CyberBattleToyCtf-v0\"\n",
                "env_size = None\n",
                "iteration_count = 1500\n",
@ -136,14 +133,7 @@
                "eval_episode_count = 10\n",
                "maximum_node_count = 12\n",
                "maximum_total_credentials = 10\n",
-                "#############\n",
-                "# gymid = \"CyberBattleChain-v0\"\n",
-                "# env_size = 10\n",
-                "# iteration_count = 9000\n",
-                "# training_episode_count = 50\n",
-                "# eval_episode_count = 5\n",
-                "# maximum_node_count = 22\n",
-                "# maximum_total_credentials = 22"
+                "plots_dir = \"output/plots\"\n"
            ]
        },
        {
@ -176,7 +166,8 @@
                "training_episode_count = 20\n",
                "eval_episode_count = 10\n",
                "maximum_node_count = 12\n",
-                "maximum_total_credentials = 10"
+                "maximum_total_credentials = 10\n",
+                "plots_dir = \"output/plots\""
            ]
        },
        {
@ -201,6 +192,8 @@
            },
            "outputs": [],
            "source": [
+                "os.makedirs(plots_dir, exist_ok=True)\n",
+                "\n",
                "# Load the Gym environment\n",
                "if env_size:\n",
                "    _gym_env = gym.make(gymid, size=env_size)\n",
@ -192540,6 +192533,8 @@
                "    f\"State: {[f.name() for f in themodel.state_space.feature_selection]} \"\n",
                "    f\"({len(themodel.state_space.feature_selection)}\\n\"\n",
                "    f\"Action: abstract_action ({themodel.action_space.flat_size()})\",\n",
+                "    save_at=os.path.join(plots_dir, \"benchmark-toyctf-cumrewards.png\"),\n",
+                "\n",
                ")"
            ]
        },
@ -192589,7 +192584,8 @@
            "source": [
                "contenders = [credlookup_run, tabularq_run, dql_run, dql_exploit_run]\n",
                "p.plot_episodes_length(contenders)\n",
-                "p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders)"
+                "p.plot_averaged_cummulative_rewards(title=f\"Agent Benchmark top contenders\\n\" f\"max_nodes:{ep.maximum_node_count}\\n\", all_runs=contenders,\n",
+                "                        save_at=os.path.join(plots_dir, \"benchmark-toyctf-cumrewards_contenders.png\"))"
            ]
        },
        {
@ -192705,4 +192701,4 @@
    },
    "nbformat": 4,
    "nbformat_minor": 5
-}
+}
--- a/notebooks/notebook_dql_transfer.ipynb
+++ b/notebooks/notebook_dql_transfer.ipynb
@ -226,7 +226,18 @@
            "source": [
                "iteration_count = 9000\n",
                "training_episode_count = 50\n",
-                "eval_episode_count = 10"
+                "eval_episode_count = 10\n",
+                "plots_dir = \"output/images\""
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "65e34f4d",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "os.makedirs(plots_dir, exist_ok=True)"
            ]
        },
        {
@ -43536,7 +43547,7 @@
                "    iteration_count=iteration_count,\n",
                "    epsilon=0.0,  # 0.35,\n",
                "    render=False,\n",
-                "    render_last_episode_rewards_to=\"images/chain10\",\n",
+                "    render_last_episode_rewards_to=os.path.join(plots_dir, \"dql_transfer-chain10\"),\n",
                "    title=\"Exploiting DQL\",\n",
                "    verbosity=Verbosity.Quiet,\n",
                ")"
--- a/notebooks/notebook_randlookups.ipynb
+++ b/notebooks/notebook_randlookups.ipynb
@ -68,12 +68,13 @@
            },
            "outputs": [],
            "source": [
-                "from cyberbattle._env.cyberbattle_env import AttackerGoal\n",
-                "from cyberbattle.agents.baseline.agent_randomcredlookup import CredentialCacheExploiter\n",
-                "import cyberbattle.agents.baseline.learner as learner\n",
+                "import os\n",
                "import gymnasium as gym\n",
                "import logging\n",
                "import sys\n",
+                "from cyberbattle._env.cyberbattle_env import AttackerGoal\n",
+                "from cyberbattle.agents.baseline.agent_randomcredlookup import CredentialCacheExploiter\n",
+                "import cyberbattle.agents.baseline.learner as learner\n",
                "import cyberbattle.agents.baseline.plotting as p\n",
                "import cyberbattle.agents.baseline.agent_wrapper as w\n",
                "from cyberbattle.agents.baseline.agent_wrapper import Verbosity"
@ -194,7 +195,8 @@
            "source": [
                "iteration_count = 9000\n",
                "training_episode_count = 50\n",
-                "eval_episode_count = 5"
+                "eval_episode_count = 5\n",
+                "plots_dir = 'plots'"
            ]
        },
        {
@ -59089,6 +59091,8 @@
                }
            ],
            "source": [
+                "os.makedirs(plots_dir, exist_ok=True)\n",
+                "\n",
                "credexplot = learner.epsilon_greedy_search(\n",
                "    cyberbattlechain_10,\n",
                "    learner=CredentialCacheExploiter(),\n",
@ -63805,7 +63809,8 @@
                "p.plot_all_episodes(credexplot)\n",
                "\n",
                "all_runs = [credexplot, randomlearning_results]\n",
-                "p.plot_averaged_cummulative_rewards(title=f\"Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\\n\", all_runs=all_runs)"
+                "p.plot_averaged_cummulative_rewards(title=f\"Benchmark -- max_nodes={ep.maximum_node_count}, episodes={eval_episode_count},\\n\", all_runs=all_runs,\n",
+                "                                     save_at=os.path.join(plots_dir, \"randlookups-cumreward.png\"))"
            ]
        },
        {
@ -63862,4 +63867,4 @@
    },
    "nbformat": 4,
    "nbformat_minor": 5
-}
+}
--- a/notebooks/notebook_tabularq.ipynb
+++ b/notebooks/notebook_tabularq.ipynb
@ -69,11 +69,12 @@
            "outputs": [],
            "source": [
                "import sys\n",
+                "import os\n",
                "import logging\n",
                "from typing import cast\n",
                "import gymnasium as gym\n",
                "import numpy as np\n",
-                "import matplotlib.pyplot as plt  # type:ignore\n",
+                "import matplotlib.pyplot as plt\n",
                "from cyberbattle.agents.baseline.learner import TrainedLearner\n",
                "import cyberbattle.agents.baseline.plotting as p\n",
                "import cyberbattle.agents.baseline.agent_wrapper as w\n",
@ -172,7 +173,8 @@
                "eval_episode_count = 5\n",
                "gamma_sweep = [\n",
                "    0.015,  # about right\n",
-                "]"
+                "]\n",
+                "plots_dir = 'output/plots'"
            ]
        },
        {
@ -181,6 +183,16 @@
            "id": "0cdf621d",
            "metadata": {},
            "outputs": [],
+            "source": [
+                "os.makedirs(plots_dir, exist_ok=True)"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "004c0ad8",
+            "metadata": {},
+            "outputs": [],
            "source": [
                "def qlearning_run(gamma, gym_env):\n",
                "    \"\"\"Execute one run of the q-learning algorithm for the\n",
@ -38410,6 +38422,7 @@
                "    f\"Q1={[f.name() for f in Q_source_10.state_space.feature_selection]} \"\n",
                "    f\"-> {[f.name() for f in Q_source_10.action_space.feature_selection]})\\n\"\n",
                "    f\"Q2={[f.name() for f in Q_attack_10.state_space.feature_selection]} -> 'action'\",\n",
+                "    save_at=os.path.join(plots_dir, \"benchmark-tabularq-cumrewards.png\")\n",
                ")"
            ]
        },
@ -72401,9 +72414,9 @@
            "cell_metadata_filter": "title,-all"
        },
        "kernelspec": {
-            "display_name": "Python [conda env:cybersim]",
+            "display_name": "cybersim",
            "language": "python",
-            "name": "conda-env-cybersim-py"
+            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
@ -72432,4 +72445,4 @@
    },
    "nbformat": 4,
    "nbformat_minor": 5
-}
+}
--- a/notebooks/notebook_withdefender.ipynb
+++ b/notebooks/notebook_withdefender.ipynb
@ -68,10 +68,10 @@
            "outputs": [],
            "source": [
                "import sys\n",
+                "import os\n",
                "import logging\n",
                "import gymnasium as gym\n",
                "import importlib\n",
-                "\n",
                "import cyberbattle.agents.baseline.learner as learner\n",
                "import cyberbattle.agents.baseline.plotting as p\n",
                "import cyberbattle.agents.baseline.agent_wrapper as w\n",
@ -18833,61 +18833,6 @@
                ")"
            ]
        },
-        {
-            "cell_type": "code",
-            "execution_count": 7,
-            "id": "d5ec9a83-bd2b-4039-8601-b1ae8355b1fd",
-            "metadata": {
-                "execution": {
-                    "iopub.execute_input": "2024-08-05T19:09:46.981034Z",
-                    "iopub.status.busy": "2024-08-05T19:09:46.980424Z",
-                    "iopub.status.idle": "2024-08-05T19:09:47.030110Z",
-                    "shell.execute_reply": "2024-08-05T19:09:47.028888Z"
-                },
-                "papermill": {
-                    "duration": 0.155751,
-                    "end_time": "2024-08-05T19:09:47.033105",
-                    "exception": false,
-                    "start_time": "2024-08-05T19:09:46.877354",
-                    "status": "completed"
-                },
-                "tags": []
-            },
-            "outputs": [],
-            "source": [
-                "import matplotlib\n",
-                "\n",
-                "# Plots\n",
-                "all_runs = [credlookup_run, dqn_with_defender, dql_exploit_run]\n",
-                "p.plot_averaged_cummulative_rewards(all_runs=all_runs, title=f\"Attacker agents vs Basic Defender -- rewards\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", show=False)"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 8,
-            "id": "664255bf-d85e-4579-b388-8bb43fe0e813",
-            "metadata": {
-                "execution": {
-                    "iopub.execute_input": "2024-08-05T19:09:47.319563Z",
-                    "iopub.status.busy": "2024-08-05T19:09:47.318510Z",
-                    "iopub.status.idle": "2024-08-05T19:09:47.364630Z",
-                    "shell.execute_reply": "2024-08-05T19:09:47.362718Z"
-                },
-                "papermill": {
-                    "duration": 0.182194,
-                    "end_time": "2024-08-05T19:09:47.367834",
-                    "exception": false,
-                    "start_time": "2024-08-05T19:09:47.185640",
-                    "status": "completed"
-                },
-                "tags": []
-            },
-            "outputs": [],
-            "source": [
-                "# p.plot_episodes_length(all_runs)\n",
-                "p.plot_averaged_availability(title=f\"Attacker agents vs Basic Defender -- availability\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", all_runs=all_runs, show=False)"
-            ]
-        },
        {
            "cell_type": "code",
            "execution_count": 9,
@ -18931,11 +18876,11 @@
                }
            ],
            "source": [
-                "import os\n",
-                "\n",
-                "os.makedirs(plots_dir, exist_ok=True)\n",
-                "matplotlib.pyplot.savefig(os.path.join(plots_dir, \"withdefender-finalplot.png\"))\n",
-                "matplotlib.pyplot.show()"
+                "# Plots\n",
+                "all_runs = [credlookup_run, dqn_with_defender, dql_exploit_run]\n",
+                "p.plot_averaged_cummulative_rewards(all_runs=all_runs, title=f\"Attacker agents vs Basic Defender -- rewards\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", save_at=os.path.join(plots_dir, \"withdefender-cumreward.png\"))\n",
+                "# p.plot_episodes_length(all_runs)\n",
+                "p.plot_averaged_availability(title=f\"Attacker agents vs Basic Defender -- availability\\n env={cyberbattlechain_defender.name}, episodes={training_episode_count}\", all_runs=all_runs, show=False)"
            ]
        }
    ],
@ -18975,4 +18920,4 @@
    },
    "nbformat": 4,
    "nbformat_minor": 5
-}
+}
--- a/notebooks/publish_benchmarks.sh
+++ b/notebooks/publish_benchmarks.sh
@ -0,0 +1,27 @@
+# Push the latest benchmark run to git under the tag 'latest_benchmark'
+# and a daily tag with the date
+set -ex
+
+THIS_DIR=$(dirname "$0")
+
+BENCHMARK_DIR=$THIS_DIR/benchmarks
+
+cp -r $THIS_DIR/output/benchmark $BENCHMARK_DIR
+
+git add $BENCHMARK_DIR
+
+# if there are no changes, push to git
+if [ -z "$(git status --porcelain)" ]; then
+    echo "No changes to commit"
+else
+    git commit -m "latest benchmark"
+fi
+
+# push the changes to git under tag 'latest_benchmark'
+git tag -f latest_benchmark
+git push -f origin latest_benchmark
+
+# create a daily tag with the date
+tagname=benchmark-$(date +%Y-%m-%d)
+git tag -f $tagname
+git push -f origin $tagname
--- a/notebooks/run_all.sh
+++ b/notebooks/run_all.sh
@ -1,4 +1,4 @@
-# Run all the Jupyter notebooks and write the output to disk
+# Run all the Jupyter notebooks in quick test mode (small number of iteartions and episodes) and write the output to disk

 set -ex

@ -11,14 +11,18 @@ script_dir=$(dirname "$0")

 pushd "$script_dir/.."

+output_dir=notebooks/output/quick
+output_plot_dir=$output_dir/plots
+
 run () {
    base=$1
-    papermill --kernel $kernel notebooks/$base.ipynb notebooks/output/$base.ipynb  "${@:2}"
+    papermill --kernel $kernel notebooks/$base.ipynb $output_dir/$base.ipynb  "${@:2}"
 }

 jupyter kernelspec list

-mkdir notebooks/output -p
+mkdir $output_dir -p
+mkdir $output_plot_dir -p

 # run c2_interactive_interface # disabled: not deterministic and can fail

@ -30,12 +34,20 @@ run toyctf-random

 run toyctf-solved

+run chainnetwork-optionwrapper
+
+run chainnetwork-random -y "
+    iterations: 100
+"
+run randomnetwork
+
 run notebook_benchmark-toyctf -y "
    iteration_count: 100
    training_episode_count: 3
    eval_episode_count: 5
    maximum_node_count: 12
    maximum_total_credentials: 10
+    plots_dir: $output_plot_dir
 "

 run notebook_benchmark-chain -y "
@ -44,6 +56,7 @@ run notebook_benchmark-chain -y "
    eval_episode_count: 3
    maximum_node_count: 12
    maximum_total_credentials: 7
+    plots_dir: $output_plot_dir
 "

 run notebook_benchmark-tiny -y "
@ -52,39 +65,34 @@ run notebook_benchmark-tiny -y "
  eval_episode_count: 2
  maximum_node_count: 5
  maximum_total_credentials: 3
-  plots_dir: notebooks/output/plots
+  plots_dir: $output_plot_dir
 "

 run notebook_dql_transfer -y "
    iteration_count: 500
    training_episode_count: 5
    eval_episode_count: 3
+    plots_dir: $output_plot_dir
 "

-run chainnetwork-optionwrapper
-
-run chainnetwork-random -y "
-    iterations: 100
-"
-
-run randomnetwork
-
 run notebook_randlookups -y "
    iteration_count: 500
    training_episode_count: 5
    eval_episode_count: 2
+    plots_dir: $output_plot_dir
 """

 run notebook_tabularq -y "
-iteration_count: 200
-training_episode_count: 5
-eval_episode_count: 2
+    iteration_count: 200
+    training_episode_count: 5
+    eval_episode_count: 2
+    plots_dir: $output_plot_dir
 "

 run notebook_withdefender -y "
    iteration_count: 100
    training_episode_count: 3
-    plots_dir: notebooks/output/plots
+    plots_dir: $output_plot_dir
 "

 run dql_active_directory -y "
@ -92,4 +100,5 @@ run dql_active_directory -y "
  iteration_count: 50
 "

+
 popd
--- a/notebooks/run_benchmark.sh
+++ b/notebooks/run_benchmark.sh
@ -0,0 +1,40 @@
+# Run benchmarking notebooks
+
+set -ex
+
+kernel=$1
+if [ -z "$kernel" ]; then
+    kernel=cybersim
+fi
+
+script_dir=$(dirname "$0")
+
+pushd "$script_dir/.."
+
+output_dir=notebooks/output/benchmark
+output_plot_dir=$output_dir/plots
+
+
+run () {
+    base=$1
+    papermill --kernel $kernel notebooks/$base.ipynb $output_dir/$base.ipynb  "${@:2}"
+}
+
+jupyter kernelspec list
+
+mkdir $output_dir -p
+mkdir $output_plot_dir -p
+
+run notebook_benchmark-chain -y "
+    gymid: "CyberBattleChain-v0"
+    iteration_count: 2000
+    training_episode_count: 20
+    eval_episode_count: 3
+    maximum_node_count: 20
+    maximum_total_credentials: 20
+    env_size: 14
+    plots_dir: $output_plot_dir
+"
+
+
+popd