diff --git a/README.md b/README.md index 4654eb01..fdd55ae5 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ of user-defined functions for message auto-handling, cluster provision, and job - [Python == 3.6/3.7](https://www.python.org/downloads/) -## Install MARO from Source ([editable mode](https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs)) +## Install MARO from Source ([Editable Mode](https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs)) - Prerequisites - C++ Compiler diff --git a/docs/source/scenarios/citi_bike.md b/docs/source/scenarios/citi_bike.md index 1c1c1bf8..7cdb36b3 100644 --- a/docs/source/scenarios/citi_bike.md +++ b/docs/source/scenarios/citi_bike.md @@ -144,6 +144,106 @@ topologies, the definition of the bike flow and the trigger mechanism of repositioning actions are the same as those in the toy topologies. We provide this series of topologies to better simulate the actual Citi Bike scenario. +### Naive Baseline + +Below are the final environment metrics of the method *no repositioning* and +*random repositioning* in different topologies. For each experiment, we setup +the environment and test for a duration of 1 week. + +#### No Repositioning + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :-------: | :---------------: | :---------------: | :-----------------: | +| toy.3s_4t | 15,118 | 8,233 | 0 | +| toy.4s_4t | 9,976 | 7,048 | 0 | +| toy.5s_6t | 16,341 | 9,231 | 0 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :-------: | :---------------: | :---------------: | :-----------------: | +| ny.201801 | 48,089 | 2,688 | 0 | +| ny.201802 | 126,374 | 8,814 | 0 | +| ny.201803 | 138,952 | 10,942 | 0 | +| ny.201804 | 161,443 | 10,349 | 0 | +| ny.201805 | 323,375 | 29,081 | 0 | +| ny.201806 | 305,971 | 26,412 | 0 | +| ny.201807 | 254,715 | 19,669 | 0 | +| ny.201808 | 302,589 | 26,352 | 0 | +| ny.201809 | 313,002 | 28,472 | 0 | +| ny.201810 | 339,268 | 24,109 | 0 | +| ny.201811 | 263,227 | 21,485 | 0 | +| ny.201812 | 209,102 | 15,876 | 0 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :-------: | :---------------: | :---------------: | :-----------------: | +| ny.201901 | 161,474 | 10,775 | 0 | +| ny.201902 | 187,354 | 12,593 | 0 | +| ny.201903 | 148,371 | 7,193 | 0 | +| ny.201904 | 280,852 | 16,906 | 0 | +| ny.201905 | 287,290 | 27,213 | 0 | +| ny.201906 | 379,415 | 33,968 | 0 | +| ny.201907 | 309,365 | 21,105 | 0 | +| ny.201908 | 371,969 | 33,703 | 0 | +| ny.201909 | 344,847 | 24,528 | 0 | +| ny.201910 | 351,855 | 29,544 | 0 | +| ny.201911 | 324,327 | 29,489 | 0 | +| ny.201912 | 184,015 | 14,205 | 0 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :-------: | :---------------: | :---------------: | :-----------------: | +| ny.202001 | 169,304 | 12,449 | 0 | +| ny.202002 | 206,105 | 14,794 | 0 | +| ny.202003 | 235,986 | 15,436 | 0 | +| ny.202004 | 91,810 | 2,348 | 0 | +| ny.202005 | 169,412 | 5,231 | 0 | +| ny.202006 | 197,883 | 7,608 | 0 | + +#### Random Repositioning + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :-------: | :---------------: | :---------------: | :-----------------: | +| toy.3s_4t | 15,154 | 8,422 +/- 11 | 449 +/- 22 | +| toy.4s_4t | 10,186 | 4,371 +/- 72 | 3,392 +/- 83 | +| toy.5s_6t | 16,171 | 7,513 +/- 40 | 3,242 +/- 71 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :-------: | :---------------: | :---------------: | :-----------------: | +| ny.201801 | 48,089 | 6,693 +/- 138 | 22,300 +/- 338 | +| ny.201802 | 126,374 | 21,418 +/- 120 | 22,328 +/- 175 | +| ny.201803 | 138,952 | 22,121 +/- 272 | 22,413 +/- 91 | +| ny.201804 | 161,443 | 22,201 +/- 194 | 22,685 +/- 185 | +| ny.201805 | 323,375 | 54,365 +/- 538 | 23,539 +/- 267 | +| ny.201806 | 305,971 | 49,876 +/- 1,091 | 24,072 +/- 349 | +| ny.201807 | 254,715 | 46,199 +/- 204 | 24,189 +/- 49 | +| ny.201808 | 302,589 | 53,679 +/- 433 | 24,257 +/- 127 | +| ny.201809 | 313,002 | 61,432 +/- 75 | 23,743 +/- 145 | +| ny.201810 | 339,268 | 64,269 +/- 600 | 23,096 +/- 51 | +| ny.201811 | 263,227 | 40,440 +/- 239 | 23,353 +/- 330 | +| ny.201812 | 209,102 | 26,067 +/- 234 | 22,859 +/- 322 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :-------: | :---------------: | :---------------: | :-----------------: | +| ny.201901 | 161,474 | 19,295 +/- 155 | 22,222 +/- 114 | +| ny.201902 | 187,354 | 23,875 +/- 282 | 22,844 +/- 18 | +| ny.201903 | 148,371 | 12,451 +/- 312 | 20,461 +/- 270 | +| ny.201904 | 280,852 | 29,591 +/- 170 | 23,234 +/- 307 | +| ny.201905 | 287,290 | 44,199 +/- 542 | 24,254 +/- 307 | +| ny.201906 | 379,415 | 51,396 +/- 256 | 25,175 +/- 237 | +| ny.201907 | 309,365 | 33,861 +/- 643 | 25,022 +/- 215 | +| ny.201908 | 371,969 | 51,319 +/- 417 | 25,834 +/- 70 | +| ny.201909 | 344,847 | 34,532 +/- 466 | 23,848 +/- 197 | +| ny.201910 | 351,855 | 37,828 +/- 502 | 24,807 +/- 208 | +| ny.201911 | 324,327 | 34,745 +/- 427 | 24,230 +/- 439 | +| ny.201912 | 184,015 | 20,119 +/- 110 | 21,866 +/- 296 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :-------: | :---------------: | :---------------: | :-----------------: | +| ny.202001 | 169,304 | 17,152 +/- 218 | 23,841 +/- 53 | +| ny.202002 | 206,105 | 24,223 +/- 209 | 24,001 +/- 77 | +| ny.202003 | 235,986 | 23,749 +/- 654 | 22,927 +/- 73 | +| ny.202004 | 91,810 | 3,349 +/- 48 | 16,341 +/- 157 | +| ny.202005 | 169,412 | 10,177 +/- 216 | 18,902 +/- 121 | +| ny.202006 | 197,883 | 11,741 +/- 170 | 17,497 +/- 219 | + ## Quick Start ### Data Preparation @@ -366,105 +466,3 @@ for ep in range(num_episode): Jump to [this notebook](https://github.com/microsoft/maro/blob/master/notebooks/bike_repositioning/interact_with_simulator.ipynb) for a quick experience. - - diff --git a/docs/source/scenarios/container_inventory_management.md b/docs/source/scenarios/container_inventory_management.md index 08ef6473..857455b1 100644 --- a/docs/source/scenarios/container_inventory_management.md +++ b/docs/source/scenarios/container_inventory_management.md @@ -128,6 +128,112 @@ manually. *(To make it clearer, the figure above only shows the service routes among ports.)* +### Naive Baseline + +Below are the final environment metrics of the method *no repositioning* and +*random repositioning* in different topologies. For each experiment, we setup +the environment and test for a duration of 1120 ticks (days). + +#### No Repositioning + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :--------------: | :---------------: | :---------------: | :-----------------: | +| toy.4p_ssdd_l0.0 | 2,240,000 | 2,190,000 | 0 | +| toy.4p_ssdd_l0.1 | 2,240,000 | 2,190,000 | 0 | +| toy.4p_ssdd_l0.2 | 2,240,000 | 2,190,000 | 0 | +| toy.4p_ssdd_l0.3 | 2,239,460 | 2,189,460 | 0 | +| toy.4p_ssdd_l0.4 | 2,244,068 | 2,194,068 | 0 | +| toy.4p_ssdd_l0.5 | 2,244,068 | 2,194,068 | 0 | +| toy.4p_ssdd_l0.6 | 2,244,068 | 2,194,068 | 0 | +| toy.4p_ssdd_l0.7 | 2,244,068 | 2,194,068 | 0 | +| toy.4p_ssdd_l0.8 | 2,241,716 | 2,191,716 | 0 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :---------------: | :---------------: | :---------------: | :-----------------: | +| toy.5p_ssddd_l0.0 | 2,240,000 | 2,140,000 | 0 | +| toy.5p_ssddd_l0.1 | 2,240,000 | 2,140,000 | 0 | +| toy.5p_ssddd_l0.2 | 2,240,000 | 2,140,000 | 0 | +| toy.5p_ssddd_l0.3 | 2,239,460 | 2,139,460 | 0 | +| toy.5p_ssddd_l0.4 | 2,244,068 | 2,144,068 | 0 | +| toy.5p_ssddd_l0.5 | 2,244,068 | 2,144,068 | 0 | +| toy.5p_ssddd_l0.6 | 2,244,068 | 2,144,068 | 0 | +| toy.5p_ssddd_l0.7 | 2,244,068 | 2,144,068 | 0 | +| toy.5p_ssddd_l0.8 | 2,241,716 | 2,141,716 | 0 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :----------------: | :---------------: | :---------------: | :-----------------: | +| toy.6p_sssbdd_l0.0 | 2,240,000 | 2,087,000 | 0 | +| toy.6p_sssbdd_l0.1 | 2,240,000 | 2,087,000 | 0 | +| toy.6p_sssbdd_l0.2 | 2,240,000 | 2,087,000 | 0 | +| toy.6p_sssbdd_l0.3 | 2,239,460 | 2,086,460 | 0 | +| toy.6p_sssbdd_l0.4 | 2,244,068 | 2,091,068 | 0 | +| toy.6p_sssbdd_l0.5 | 2,244,068 | 2,091,068 | 0 | +| toy.6p_sssbdd_l0.6 | 2,244,068 | 2,091,068 | 0 | +| toy.6p_sssbdd_l0.7 | 2,244,068 | 2,091,068 | 0 | +| toy.6p_sssbdd_l0.8 | 2,241,716 | 2,088,716 | 0 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :-------------------: | :---------------: | :---------------: | :-----------------: | +| global_trade.22p_l0.0 | 2,240,000 | 1,028,481 | 0 | +| global_trade.22p_l0.1 | 2,240,000 | 1,081,935 | 0 | +| global_trade.22p_l0.2 | 2,240,000 | 1,083,358 | 0 | +| global_trade.22p_l0.3 | 2,239,460 | 1,085,212 | 0 | +| global_trade.22p_l0.4 | 2,244,068 | 1,089,628 | 0 | +| global_trade.22p_l0.5 | 2,244,068 | 1,102,913 | 0 | +| global_trade.22p_l0.6 | 2,244,068 | 1,122,092 | 0 | +| global_trade.22p_l0.7 | 2,244,068 | 1,162,108 | 0 | +| global_trade.22p_l0.8 | 2,241,716 | 1,161,714 | 0 | + +#### Random Repositioning + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :--------------: | :---------------: | :-------------------: | :-----------------: | +| toy.4p_ssdd_l0.0 | 2,240,000 | 1,497,138 +/- 30,423 | 209,254 +/- 9,257 | +| toy.4p_ssdd_l0.1 | 2,240,000 | 1,623,710 +/- 36,421 | 100,918 +/- 1,835 | +| toy.4p_ssdd_l0.2 | 2,240,000 | 1,501,466 +/- 48,566 | 107,259 +/- 4,015 | +| toy.4p_ssdd_l0.3 | 2,239,460 | 1,577,011 +/- 35,109 | 104,925 +/- 1,756 | +| toy.4p_ssdd_l0.4 | 2,244,068 | 1,501,835 +/- 103,196 | 109,024 +/- 1,651 | +| toy.4p_ssdd_l0.5 | 2,244,068 | 1,546,227 +/- 81,107 | 103,866 +/- 5,687 | +| toy.4p_ssdd_l0.6 | 2,244,068 | 1,578,863 +/- 127,815 | 111,036 +/- 5,333 | +| toy.4p_ssdd_l0.7 | 2,244,068 | 1,519,495 +/- 60,555 | 122,074 +/- 3,985 | +| toy.4p_ssdd_l0.8 | 2,241,716 | 1,603,063 +/- 109,149 | 125,946 +/- 9,660 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :---------------: | :---------------: | :-------------------: | :-----------------: | +| toy.5p_ssddd_l0.0 | 2,240,000 | 1,371,021 +/- 34,619 | 198,306 +/- 6,948 | +| toy.5p_ssddd_l0.1 | 2,240,000 | 1,720,068 +/- 18,939 | 77,514 +/- 1,280 | +| toy.5p_ssddd_l0.2 | 2,240,000 | 1,716,435 +/- 15,499 | 74,843 +/- 1,563 | +| toy.5p_ssddd_l0.3 | 2,239,460 | 1,700,456 +/- 26,510 | 79,332 +/- 575 | +| toy.5p_ssddd_l0.4 | 2,244,068 | 1,663,139 +/- 34,244 | 79,708 +/- 5,152 | +| toy.5p_ssddd_l0.5 | 2,244,068 | 1,681,519 +/- 107,863 | 81,768 +/- 3,094 | +| toy.5p_ssddd_l0.6 | 2,244,068 | 1,660,330 +/- 38,318 | 81,503 +/- 4,079 | +| toy.5p_ssddd_l0.7 | 2,244,068 | 1,709,022 +/- 31,440 | 92,717 +/- 8,354 | +| toy.5p_ssddd_l0.8 | 2,241,716 | 1,763,950 +/- 73,935 | 92,921 +/- 3,034 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :----------------: | :---------------: | :------------------: | :-----------------: | +| toy.6p_sssbdd_l0.0 | 2,240,000 | 1,529,774 +/- 73,104 | 199,478 +/- 11,637 | +| toy.6p_sssbdd_l0.1 | 2,240,000 | 1,736,385 +/- 16,736 | 56,106 +/- 1,448 | +| toy.6p_sssbdd_l0.2 | 2,240,000 | 1,765,945 +/- 4,680 | 52,626 +/- 2,201 | +| toy.6p_sssbdd_l0.3 | 2,239,460 | 1,811,987 +/- 15,436 | 49,937 +/- 3,484 | +| toy.6p_sssbdd_l0.4 | 2,244,068 | 1,783,362 +/- 39,122 | 52,993 +/- 2,455 | +| toy.6p_sssbdd_l0.5 | 2,244,068 | 1,755,551 +/- 44,855 | 55,055 +/- 2,759 | +| toy.6p_sssbdd_l0.6 | 2,244,068 | 1,830,504 +/- 10,690 | 57,083 +/- 526 | +| toy.6p_sssbdd_l0.7 | 2,244,068 | 1,742,129 +/- 23,910 | 65,571 +/- 3,228 | +| toy.6p_sssbdd_l0.8 | 2,241,716 | 1,761,283 +/- 22,338 | 66,827 +/- 1,501 | + +| Topology | Total Requirement | Resource Shortage | Repositioning Number| +| :-------------------: | :---------------: | :------------------: | :-----------------: | +| global_trade.22p_l0.0 | 2,240,000 | 1,010,009 +/- 20,942 | 27,412 +/- 730 | +| global_trade.22p_l0.1 | 2,240,000 | 1,027,395 +/- 19,183 | 9,408 +/- 647 | +| global_trade.22p_l0.2 | 2,240,000 | 1,035,851 +/- 4,352 | 9,062 +/- 262 | +| global_trade.22p_l0.3 | 2,239,460 | 1,032,480 +/- 1,332 | 9,511 +/- 446 | +| global_trade.22p_l0.4 | 2,244,068 | 1,034,412 +/- 11,689 | 9,304 +/- 314 | +| global_trade.22p_l0.5 | 2,244,068 | 1,042,869 +/- 16,146 | 9,436 +/- 394 | +| global_trade.22p_l0.6 | 2,244,068 | 1,096,502 +/- 26,896 | 15,114 +/- 1,377 | +| global_trade.22p_l0.7 | 2,244,068 | 1,144,981 +/- 5,355 | 14,176 +/- 1,285 | +| global_trade.22p_l0.8 | 2,241,716 | 1,154,184 +/- 7,043 | 13,548 +/- 112 | + ## Quick Start ### Data Preparation @@ -240,111 +346,3 @@ for ep in range(num_episode): Jump to [this notebook](https://github.com/microsoft/maro/blob/master/notebooks/container_inventory_management/interact_with_simulator.ipynb) for a quick experience. - - diff --git a/examples/cim/dqn/components/agent_manager.py b/examples/cim/dqn/components/agent_manager.py index dd2e3d6e..b4b90101 100644 --- a/examples/cim/dqn/components/agent_manager.py +++ b/examples/cim/dqn/components/agent_manager.py @@ -1,43 +1,35 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import io -import yaml - from torch.nn.functional import smooth_l1_loss from torch.optim import RMSprop +from agent import CIMAgent +from config import config from maro.rl import AbsAgentManager, LearningModel, MLPDecisionLayers, DQN, DQNHyperParams, ColumnBasedStore -from maro.utils import convert_dottable, set_seeds -from .agent import CIMAgent - - -with io.open("config.yml", "r") as in_file: - raw_config = yaml.safe_load(in_file) - config = convert_dottable(raw_config) - config = config.agents +from maro.utils import set_seeds class DQNAgentManager(AbsAgentManager): def _assemble(self, agent_dict): - set_seeds(config.seed) - num_actions = config.algorithm.num_actions + set_seeds(config.agents.seed) + num_actions = config.agents.algorithm.num_actions for agent_id in self._agent_id_list: eval_model = LearningModel(decision_layers=MLPDecisionLayers(name=f'{agent_id}.policy', input_dim=self._state_shaper.dim, output_dim=num_actions, - **config.algorithm.model) + **config.agents.algorithm.model) ) algorithm = DQN(model_dict={"eval": eval_model}, - optimizer_opt=(RMSprop, config.algorithm.optimizer), + optimizer_opt=(RMSprop, config.agents.algorithm.optimizer), loss_func_dict={"eval": smooth_l1_loss}, - hyper_params=DQNHyperParams(**config.algorithm.hyper_parameters, + hyper_params=DQNHyperParams(**config.agents.algorithm.hyper_parameters, num_actions=num_actions)) - experience_pool = ColumnBasedStore(**config.experience_pool) + experience_pool = ColumnBasedStore(**config.agents.experience_pool) agent_dict[agent_id] = CIMAgent(name=agent_id, algorithm=algorithm, experience_pool=experience_pool, - **config.training_loop_parameters) + **config.agents.training_loop_parameters) def store_experiences(self, experiences): for agent_id, exp in experiences.items(): diff --git a/examples/cim/dqn/components/config.py b/examples/cim/dqn/components/config.py new file mode 100644 index 00000000..d36d60dd --- /dev/null +++ b/examples/cim/dqn/components/config.py @@ -0,0 +1,18 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +This file is used to load config and convert it into a dotted dictionary. +""" + +import io +import os +import yaml + +from maro.utils import convert_dottable + + +CONFIG_PATH = os.path.join(os.path.split(os.path.realpath(__file__))[0], "../config.yml") +with io.open(CONFIG_PATH, "r") as in_file: + raw_config = yaml.safe_load(in_file) + config = convert_dottable(raw_config) diff --git a/examples/cim/dqn/components/dist_actor.py b/examples/cim/dqn/components/dist_actor.py index ca0aa713..5491c8b3 100644 --- a/examples/cim/dqn/components/dist_actor.py +++ b/examples/cim/dqn/components/dist_actor.py @@ -1,24 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import io -import yaml - import numpy as np from maro.simulator import Env from maro.rl import AgentMode, SimpleActor, ActorWorker, KStepExperienceShaper, TwoPhaseLinearExplorer -from maro.utils import convert_dottable -from examples.cim.dqn.components.state_shaper import CIMStateShaper -from examples.cim.dqn.components.action_shaper import CIMActionShaper -from examples.cim.dqn.components.experience_shaper import TruncatedExperienceShaper -from examples.cim.dqn.components.agent_manager import DQNAgentManager +from config import config +from state_shaper import CIMStateShaper +from action_shaper import CIMActionShaper +from experience_shaper import TruncatedExperienceShaper +from agent_manager import DQNAgentManager -with io.open("config.yml", "r") as in_file: - raw_config = yaml.safe_load(in_file) - config = convert_dottable(raw_config) - if __name__ == "__main__": env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] diff --git a/examples/cim/dqn/components/dist_learner.py b/examples/cim/dqn/components/dist_learner.py index fac527cd..edc54913 100644 --- a/examples/cim/dqn/components/dist_learner.py +++ b/examples/cim/dqn/components/dist_learner.py @@ -2,19 +2,13 @@ # Licensed under the MIT license. import os -import io -import yaml -from maro.simulator import Env +from agent_manager import DQNAgentManager +from config import config from maro.rl import ActorProxy, SimpleLearner, AgentMode, TwoPhaseLinearExplorer -from examples.cim.dqn.components.state_shaper import CIMStateShaper -from maro.utils import Logger, convert_dottable -from examples.cim.dqn.components.agent_manager import DQNAgentManager - - -with io.open("config.yml", "r") as in_file: - raw_config = yaml.safe_load(in_file) - config = convert_dottable(raw_config) +from maro.simulator import Env +from maro.utils import Logger +from state_shaper import CIMStateShaper if __name__ == "__main__": diff --git a/examples/cim/dqn/multi_process_launcher.py b/examples/cim/dqn/multi_process_launcher.py index 512abbe0..a3111e29 100644 --- a/examples/cim/dqn/multi_process_launcher.py +++ b/examples/cim/dqn/multi_process_launcher.py @@ -7,11 +7,15 @@ This script is used to debug distributed algorithm in single host multi-process import os -ACTOR_NUM = 1 # must be same as in config -LEARNER_NUM = 1 +from components.config import config +from maro.utils import Logger, convert_dottable -learner_path = "components/dist_learner.py &" -actor_path = "components/dist_actor.py &" + +ACTOR_NUM = config.distributed.learner.peer["actor_worker"] # must be same as in config +LEARNER_NUM = config.distributed.actor.peer["actor"] + +learner_path = f"{os.path.split(os.path.realpath(__file__))[0]}/components/dist_learner.py &" +actor_path = f"{os.path.split(os.path.realpath(__file__))[0]}/components/dist_actor.py &" for l_num in range(LEARNER_NUM): os.system(f"python " + learner_path) diff --git a/examples/cim/dqn/single_process_launcher.py b/examples/cim/dqn/single_process_launcher.py index f026e3c5..f13e8e28 100644 --- a/examples/cim/dqn/single_process_launcher.py +++ b/examples/cim/dqn/single_process_launcher.py @@ -29,7 +29,8 @@ if __name__ == "__main__": if config.experience_shaping.type == "truncated": experience_shaper = TruncatedExperienceShaper(**config.experience_shaping.truncated) else: - experience_shaper = KStepExperienceShaper(reward_func=lambda mt: mt["perf"], **config.experience_shaping.k_step) + experience_shaper = KStepExperienceShaper(reward_func=lambda mt: 1-mt["container_shortage"]/mt["order_requirements"], + **config.experience_shaping.k_step) exploration_config = {"epsilon_range_dict": {"_all_": config.exploration.epsilon_range}, "split_point_dict": {"_all_": config.exploration.split_point}, diff --git a/notebooks/container_inventory_management/ECR_tutorial.ipynb b/notebooks/container_inventory_management/ECR_tutorial.ipynb deleted file mode 100644 index 521c599e..00000000 --- a/notebooks/container_inventory_management/ECR_tutorial.ipynb +++ /dev/null @@ -1,267 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# state shaper\n", - "\n", - "A state shaper is used to convert an environment observation to a state vector as input to value or policy models by extracting relevant temporal and spatial information. The scenario-specific __call__ method returns the the ID of the agent involved in the current decision event and the shaped state. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "from maro.rl import AbstractStateShaper\n", - "\n", - "\n", - "class ECRStateShaper(AbstractStateShaper):\n", - " def __init__(self, *, look_back, max_ports_downstream, port_attributes, vessel_attributes):\n", - " super().__init__()\n", - " self._look_back = look_back\n", - " self._max_ports_downstream = max_ports_downstream\n", - " self._port_attributes = port_attributes\n", - " self._vessel_attributes = vessel_attributes\n", - " self._dim = (look_back + 1) * (max_ports_downstream + 1) * len(port_attributes) + len(vessel_attributes)\n", - "\n", - " def __call__(self, decision_event, snapshot_list):\n", - " tick, port_idx, vessel_idx = decision_event.tick, decision_event.port_idx, decision_event.vessel_idx\n", - " ticks = [tick - rt for rt in range(self._look_back-1)]\n", - " future_port_idx_list = snapshot_list[\"vessels\"][tick: vessel_idx: 'future_stop_list'].astype('int')\n", - " port_features = snapshot_list[\"ports\"][ticks: [port_idx] + list(future_port_idx_list): self._port_attributes]\n", - " vessel_features = snapshot_list[\"vessels\"][tick: vessel_idx: self._vessel_attributes]\n", - " state = np.concatenate((port_features, vessel_features))\n", - " return str(port_idx), state\n", - " \n", - " @property\n", - " def dim(self):\n", - " return self._dim" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# action shaper\n", - "\n", - "An action shaper is used to convert the output of an underlying algorithm's choose_action() method to an Action object which can be executed by the env's step() method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from maro.rl import AbstractActionShaper\n", - "from maro.simulator.scenarios.ecr.common import Action\n", - "\n", - "\n", - "class ECRActionShaper(AbstractActionShaper):\n", - " def __init__(self, action_space):\n", - " super().__init__()\n", - " self._action_space = action_space\n", - " self._zero_action_index = action_space.index(0)\n", - "\n", - " def __call__(self, model_action, decision_event, snapshot_list):\n", - " scope = decision_event.action_scope\n", - " tick = decision_event.tick\n", - " port_idx = decision_event.port_idx\n", - " vessel_idx = decision_event.vessel_idx\n", - "\n", - " port_empty = snapshot_list[\"ports\"][tick: port_idx: [\"empty\", \"full\", \"on_shipper\", \"on_consignee\"]][0]\n", - " vessel_remaining_space = snapshot_list[\"vessels\"][tick: vessel_idx: [\"empty\", \"full\", \"remaining_space\"]][2]\n", - " early_discharge = snapshot_list[\"vessels\"][tick:vessel_idx: \"early_discharge\"][0]\n", - " assert 0 <= model_action < len(self._action_space)\n", - "\n", - " if model_action < self._zero_action_index:\n", - " actual_action = max(round(self._action_space[model_action] * port_empty), -vessel_remaining_space)\n", - " elif model_action > self._zero_action_index:\n", - " plan_action = self._action_space[model_action] * (scope.discharge + early_discharge) - early_discharge\n", - " actual_action = round(plan_action) if plan_action > 0 else round(self._action_space[model_action] * scope.discharge)\n", - " else:\n", - " actual_action = 0\n", - "\n", - " return Action(vessel_idx, port_idx, actual_action)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# reward shaper\n", - "\n", - "A reward shaper is used to record transitions during a roll-out episode and perform necessary post-processing at the end of the episode. The post-processing logic is encapsulated in the abstract shape() method and needs to be implemented for each scenario. It is necessary to compute rewards and next-states (and also next-actions for SARSA-like on-policy algorithms) during post-processing as they are set to None during the episode. In particular, it is necessary to specify how to determine the reward for an action given the business metrics associated with the corresponding transition. MARO provides the KStepRewardShaper class which may be combined with a user-defined reward function to form a default reward shaper. Here we showcase a custom reward shaper for the ECR scenario. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "from maro.rl import AbstractRewardShaper, ExperienceKey, ExperienceInfoKey\n", - "\n", - "\n", - "class ECRRewardShaper(AbstractRewardShaper):\n", - " def __init__(self, *, agent_id_list, time_window: int, time_decay_factor: float,\n", - " fulfillment_factor: float, shortage_factor: float):\n", - " super().__init__()\n", - " self._agent_id_list = agent_id_list\n", - " self._time_window = time_window\n", - " self._time_decay_factor = time_decay_factor\n", - " self._fulfillment_factor = fulfillment_factor\n", - " self._shortage_factor = shortage_factor\n", - "\n", - " def _shape(self, snapshot_list):\n", - " for i in range(len(self._trajectory[ExperienceKey.STATE])-1):\n", - " metrics = self._trajectory[ExperienceKey.INFO][i][ExperienceInfoKey.METRICS]\n", - " event = pickle.loads(self._trajectory[ExperienceKey.INFO][i][ExperienceInfoKey.EVENT])\n", - " self._trajectory[ExperienceKey.REWARD][i] = self._compute_reward(metrics, event, snapshot_list)\n", - " self._trajectory[ExperienceKey.NEXT_STATE][i] = self._trajectory[ExperienceKey.STATE][i+1]\n", - " self._trajectory[ExperienceKey.NEXT_ACTION][i] = self._trajectory[ExperienceKey.ACTION][i+1]\n", - " self._trajectory[ExperienceKey.INFO][i][ExperienceInfoKey.DISCOUNT] = .0\n", - "\n", - " def _compute_reward(self, metrics, decision_event, snapshot_list):\n", - " start_tick = decision_event.tick + 1\n", - " end_tick = decision_event.tick + self._time_window\n", - " ticks = list(range(start_tick, end_tick))\n", - "\n", - " # calculate tc reward\n", - " decay_list = [self._time_decay_factor ** i for i in range(end_tick - start_tick)\n", - " for _ in range(len(self._agent_id_list))]\n", - "\n", - " tot_fulfillment = np.dot(snapshot_list[\"ports\"][ticks::\"fulfillment\"], decay_list)\n", - " tot_shortage = np.dot(snapshot_list[\"ports\"][ticks::\"shortage\"], decay_list)\n", - "\n", - " return np.float(self._fulfillment_factor * tot_fulfillment - self._shortage_factor * tot_shortage)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# agent manager\n", - "\n", - "An agent manager manages all agents and provides a unified interface with the environment. It is composed of a state shaper and an action shaper which perform necessary conversions so that the underlying agents do not need to concern themselves with the business logic; " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from torch.nn.functional import smooth_l1_loss\n", - "from torch.optim import RMSprop\n", - "\n", - "from maro.rl import AgentManager, Agent, AgentParameters, LearningModel, MLPDecisionLayers, DQN, DQNHyperParams, \\\n", - " ExperienceInfoKey\n", - "\n", - "num_actions = 21\n", - "model_config = {\"hidden_dims\": [256, 128, 64], \"output_dim\": num_actions, \"dropout_p\": 0.0}\n", - "optimizer_config = {\"lr\": 0.05}\n", - "dqn_config = {\"num_actions\": num_actions, \"replace_target_frequency\": 5, \"tau\": 0.1}\n", - "training_config = {\"min_experiences_to_train\": 1024, \"samplers\": [(lambda d: d[ExperienceInfoKey.TD_ERROR], 128)],\n", - " \"num_steps\": 10}\n", - "\n", - "\n", - "class DQNAgentManager(AgentManager):\n", - " def _assemble_agents(self):\n", - " agent_params = AgentParameters(**training_config)\n", - " for agent_id in self._agent_id_list:\n", - " eval_model = LearningModel(decision_layers=MLPDecisionLayers(name=f'{agent_id}.policy',\n", - " input_dim=self._state_shaper.dim,\n", - " **model_config)\n", - " )\n", - "\n", - " algorithm = DQN(model_dict={\"eval\": eval_model}, optimizer_opt=(RMSprop, optimizer_config),\n", - " loss_func_dict={\"eval\": smooth_l1_loss}, hyper_params=DQNHyperParams(**dqn_config))\n", - "\n", - " self._agent_dict[agent_id] = Agent(name=agent_id, algorithm=algorithm, params=agent_params)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# main loop\n", - "\n", - "The code below demonstrates the typical structure of a program using MARO. One starts by creating an environment. Next, shapers and an explorer are created and an agent manager is created by loading these components. The creation of the agent manager also assembles all agents under the hood. Because the code is for the single-host mode, the agent manager mode is set to TRAIN_INFERENCE. Next, an actor is created to wrap the env and agent manager, and a learner is created to wrap the same agent manager and the actor. Finally, the task is started by calling the learner's train_test() method. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from maro.simulator import Env\n", - "from maro.rl import SimpleLearner, SimpleActor, AgentMode, KStepRewardShaper, TwoPhaseLinearExplorer\n", - "from maro.utils import Logger, convert_dottable\n", - "\n", - "\n", - "total_episodes = 100\n", - "\n", - "env = Env(scenario=\"ecr\", topology=\"toy.5p_ssddd_l0.0\", durations=1120)\n", - "agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]\n", - "state_shaper = ECRStateShaper(look_back=7, max_ports_downstream=2,\n", - " port_attributes=[\"empty\",\"full\",\"on_shipper\",\"on_consignee\",\"booking\",\"shortage\",\"fulfillment\"],\n", - " vessel_attributes=[\"empty\",\"full\", \"remaining_space\"]\n", - " )\n", - "action_shaper = ECRActionShaper(action_space=list(np.linspace(-1.0, 1.0, num_actions)))\n", - "reward_shaper = ECRRewardShaper(agent_id_list=agent_id_list, time_window=100, fulfillment_factor=1.0,\n", - " shortage_factor=1.0, time_decay_factor=0.97)\n", - "explorer = TwoPhaseLinearExplorer(agent_id_list, total_episodes, \n", - " epsilon_range_dict={\"_all_\": (.0, .4)},\n", - " split_point_dict={\"_all_\": (.5, .8)}\n", - " )\n", - "agent_manager = DQNAgentManager(name=\"ecr_learner\",\n", - " mode=AgentMode.TRAIN_INFERENCE,\n", - " agent_id_list=agent_id_list,\n", - " state_shaper=state_shaper,\n", - " action_shaper=action_shaper,\n", - " reward_shaper=reward_shaper,\n", - " explorer=explorer)\n", - "learner = SimpleLearner(trainable_agents=agent_manager,\n", - " actor=SimpleActor(env=env, inference_agents=agent_manager),\n", - " logger=Logger(\"single_host_ecr_learner\", auto_timestamp=False))\n", - "\n", - "learner.train_test(total_episodes)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}