зеркало из https://github.com/microsoft/FQF.git
add option for directbq and sqloss
This commit is contained in:
Родитель
a8403a7462
Коммит
4d9b20e6ac
Двоичный файл не отображается.
Двоичный файл не отображается.
|
@ -0,0 +1,46 @@
|
|||
# Hyperparameters follow Dabney et al. (2018).
|
||||
import dopamine.agents.fqf.fqf_agent
|
||||
import dopamine.agents.rainbow.rainbow_agent
|
||||
import dopamine.discrete_domains.atari_lib
|
||||
import dopamine.discrete_domains.run_experiment
|
||||
import dopamine.replay_memory.prioritized_replay_buffer
|
||||
import gin.tf.external_configurables
|
||||
|
||||
FQFAgent.kappa = 1.0
|
||||
FQFAgent.num_tau_samples = 32
|
||||
FQFAgent.num_tau_prime_samples = 32
|
||||
FQFAgent.num_quantile_samples = 32
|
||||
FQFAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.00001-s0'
|
||||
FQFAgent.fqf_factor = '0.000001'
|
||||
FQFAgent.fqf_ent = '0.00001'
|
||||
RainbowAgent.gamma = 0.99
|
||||
RainbowAgent.game = 'Centipede'
|
||||
RainbowAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.00001-s0'
|
||||
RainbowAgent.update_horizon = 1
|
||||
RainbowAgent.min_replay_history = 50000 # agent steps
|
||||
RainbowAgent.update_period = 4
|
||||
RainbowAgent.target_update_period = 10000 # agent steps
|
||||
RainbowAgent.epsilon_train = 0.01
|
||||
RainbowAgent.epsilon_eval = 0.001
|
||||
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
|
||||
RainbowAgent.replay_scheme = 'uniform'
|
||||
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
|
||||
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
|
||||
|
||||
tf.train.AdamOptimizer.learning_rate = 0.00005
|
||||
tf.train.AdamOptimizer.epsilon = 0.0003125
|
||||
|
||||
atari_lib.create_atari_environment.game_name = 'Centipede'
|
||||
atari_lib.create_atari_environment.sticky_actions = False
|
||||
create_agent.agent_name = 'implicit_quantile'
|
||||
Runner.num_iterations = 200
|
||||
Runner.game = 'Centipede'
|
||||
Runner.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.00001-s0'
|
||||
Runner.training_steps = 250000
|
||||
Runner.evaluation_steps = 125000
|
||||
Runner.max_steps_per_episode = 27000
|
||||
|
||||
AtariPreprocessing.terminal_on_life_loss = True
|
||||
|
||||
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
|
||||
WrappedPrioritizedReplayBuffer.batch_size = 32
|
|
@ -0,0 +1,46 @@
|
|||
# Hyperparameters follow Dabney et al. (2018).
|
||||
import dopamine.agents.fqf.fqf_agent
|
||||
import dopamine.agents.rainbow.rainbow_agent
|
||||
import dopamine.discrete_domains.atari_lib
|
||||
import dopamine.discrete_domains.run_experiment
|
||||
import dopamine.replay_memory.prioritized_replay_buffer
|
||||
import gin.tf.external_configurables
|
||||
|
||||
FQFAgent.kappa = 1.0
|
||||
FQFAgent.num_tau_samples = 32
|
||||
FQFAgent.num_tau_prime_samples = 32
|
||||
FQFAgent.num_quantile_samples = 32
|
||||
FQFAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.0001-s0'
|
||||
FQFAgent.fqf_factor = '0.000001'
|
||||
FQFAgent.fqf_ent = '0.0001'
|
||||
RainbowAgent.gamma = 0.99
|
||||
RainbowAgent.game = 'Centipede'
|
||||
RainbowAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.0001-s0'
|
||||
RainbowAgent.update_horizon = 1
|
||||
RainbowAgent.min_replay_history = 50000 # agent steps
|
||||
RainbowAgent.update_period = 4
|
||||
RainbowAgent.target_update_period = 10000 # agent steps
|
||||
RainbowAgent.epsilon_train = 0.01
|
||||
RainbowAgent.epsilon_eval = 0.001
|
||||
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
|
||||
RainbowAgent.replay_scheme = 'uniform'
|
||||
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
|
||||
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
|
||||
|
||||
tf.train.AdamOptimizer.learning_rate = 0.00005
|
||||
tf.train.AdamOptimizer.epsilon = 0.0003125
|
||||
|
||||
atari_lib.create_atari_environment.game_name = 'Centipede'
|
||||
atari_lib.create_atari_environment.sticky_actions = False
|
||||
create_agent.agent_name = 'implicit_quantile'
|
||||
Runner.num_iterations = 200
|
||||
Runner.game = 'Centipede'
|
||||
Runner.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.0001-s0'
|
||||
Runner.training_steps = 250000
|
||||
Runner.evaluation_steps = 125000
|
||||
Runner.max_steps_per_episode = 27000
|
||||
|
||||
AtariPreprocessing.terminal_on_life_loss = True
|
||||
|
||||
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
|
||||
WrappedPrioritizedReplayBuffer.batch_size = 32
|
|
@ -0,0 +1,46 @@
|
|||
# Hyperparameters follow Dabney et al. (2018).
|
||||
import dopamine.agents.fqf.fqf_agent
|
||||
import dopamine.agents.rainbow.rainbow_agent
|
||||
import dopamine.discrete_domains.atari_lib
|
||||
import dopamine.discrete_domains.run_experiment
|
||||
import dopamine.replay_memory.prioritized_replay_buffer
|
||||
import gin.tf.external_configurables
|
||||
|
||||
FQFAgent.kappa = 1.0
|
||||
FQFAgent.num_tau_samples = 32
|
||||
FQFAgent.num_tau_prime_samples = 32
|
||||
FQFAgent.num_quantile_samples = 32
|
||||
FQFAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.00001-s0'
|
||||
FQFAgent.fqf_factor = '0.00001'
|
||||
FQFAgent.fqf_ent = '0.00001'
|
||||
RainbowAgent.gamma = 0.99
|
||||
RainbowAgent.game = 'Centipede'
|
||||
RainbowAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.00001-s0'
|
||||
RainbowAgent.update_horizon = 1
|
||||
RainbowAgent.min_replay_history = 50000 # agent steps
|
||||
RainbowAgent.update_period = 4
|
||||
RainbowAgent.target_update_period = 10000 # agent steps
|
||||
RainbowAgent.epsilon_train = 0.01
|
||||
RainbowAgent.epsilon_eval = 0.001
|
||||
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
|
||||
RainbowAgent.replay_scheme = 'uniform'
|
||||
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
|
||||
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
|
||||
|
||||
tf.train.AdamOptimizer.learning_rate = 0.00005
|
||||
tf.train.AdamOptimizer.epsilon = 0.0003125
|
||||
|
||||
atari_lib.create_atari_environment.game_name = 'Centipede'
|
||||
atari_lib.create_atari_environment.sticky_actions = False
|
||||
create_agent.agent_name = 'implicit_quantile'
|
||||
Runner.num_iterations = 200
|
||||
Runner.game = 'Centipede'
|
||||
Runner.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.00001-s0'
|
||||
Runner.training_steps = 250000
|
||||
Runner.evaluation_steps = 125000
|
||||
Runner.max_steps_per_episode = 27000
|
||||
|
||||
AtariPreprocessing.terminal_on_life_loss = True
|
||||
|
||||
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
|
||||
WrappedPrioritizedReplayBuffer.batch_size = 32
|
|
@ -0,0 +1,46 @@
|
|||
# Hyperparameters follow Dabney et al. (2018).
|
||||
import dopamine.agents.fqf.fqf_agent
|
||||
import dopamine.agents.rainbow.rainbow_agent
|
||||
import dopamine.discrete_domains.atari_lib
|
||||
import dopamine.discrete_domains.run_experiment
|
||||
import dopamine.replay_memory.prioritized_replay_buffer
|
||||
import gin.tf.external_configurables
|
||||
|
||||
FQFAgent.kappa = 1.0
|
||||
FQFAgent.num_tau_samples = 32
|
||||
FQFAgent.num_tau_prime_samples = 32
|
||||
FQFAgent.num_quantile_samples = 32
|
||||
FQFAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.0001-s0'
|
||||
FQFAgent.fqf_factor = '0.00001'
|
||||
FQFAgent.fqf_ent = '0.0001'
|
||||
RainbowAgent.gamma = 0.99
|
||||
RainbowAgent.game = 'Centipede'
|
||||
RainbowAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.0001-s0'
|
||||
RainbowAgent.update_horizon = 1
|
||||
RainbowAgent.min_replay_history = 50000 # agent steps
|
||||
RainbowAgent.update_period = 4
|
||||
RainbowAgent.target_update_period = 10000 # agent steps
|
||||
RainbowAgent.epsilon_train = 0.01
|
||||
RainbowAgent.epsilon_eval = 0.001
|
||||
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
|
||||
RainbowAgent.replay_scheme = 'uniform'
|
||||
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
|
||||
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
|
||||
|
||||
tf.train.AdamOptimizer.learning_rate = 0.00005
|
||||
tf.train.AdamOptimizer.epsilon = 0.0003125
|
||||
|
||||
atari_lib.create_atari_environment.game_name = 'Centipede'
|
||||
atari_lib.create_atari_environment.sticky_actions = False
|
||||
create_agent.agent_name = 'implicit_quantile'
|
||||
Runner.num_iterations = 200
|
||||
Runner.game = 'Centipede'
|
||||
Runner.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.0001-s0'
|
||||
Runner.training_steps = 250000
|
||||
Runner.evaluation_steps = 125000
|
||||
Runner.max_steps_per_episode = 27000
|
||||
|
||||
AtariPreprocessing.terminal_on_life_loss = True
|
||||
|
||||
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
|
||||
WrappedPrioritizedReplayBuffer.batch_size = 32
|
|
@ -0,0 +1,46 @@
|
|||
# Hyperparameters follow Dabney et al. (2018).
|
||||
import dopamine.agents.fqf.fqf_agent
|
||||
import dopamine.agents.rainbow.rainbow_agent
|
||||
import dopamine.discrete_domains.atari_lib
|
||||
import dopamine.discrete_domains.run_experiment
|
||||
import dopamine.replay_memory.prioritized_replay_buffer
|
||||
import gin.tf.external_configurables
|
||||
|
||||
FQFAgent.kappa = 1.0
|
||||
FQFAgent.num_tau_samples = 32
|
||||
FQFAgent.num_tau_prime_samples = 32
|
||||
FQFAgent.num_quantile_samples = 32
|
||||
FQFAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.00001-s0'
|
||||
FQFAgent.fqf_factor = '0.000001'
|
||||
FQFAgent.fqf_ent = '0.00001'
|
||||
RainbowAgent.gamma = 0.99
|
||||
RainbowAgent.game = 'Centipede'
|
||||
RainbowAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.00001-s0'
|
||||
RainbowAgent.update_horizon = 1
|
||||
RainbowAgent.min_replay_history = 50000 # agent steps
|
||||
RainbowAgent.update_period = 4
|
||||
RainbowAgent.target_update_period = 10000 # agent steps
|
||||
RainbowAgent.epsilon_train = 0.01
|
||||
RainbowAgent.epsilon_eval = 0.001
|
||||
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
|
||||
RainbowAgent.replay_scheme = 'uniform'
|
||||
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
|
||||
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
|
||||
|
||||
tf.train.AdamOptimizer.learning_rate = 0.00005
|
||||
tf.train.AdamOptimizer.epsilon = 0.0003125
|
||||
|
||||
atari_lib.create_atari_environment.game_name = 'Centipede'
|
||||
atari_lib.create_atari_environment.sticky_actions = False
|
||||
create_agent.agent_name = 'implicit_quantile'
|
||||
Runner.num_iterations = 200
|
||||
Runner.game = 'Centipede'
|
||||
Runner.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.00001-s0'
|
||||
Runner.training_steps = 250000
|
||||
Runner.evaluation_steps = 125000
|
||||
Runner.max_steps_per_episode = 27000
|
||||
|
||||
AtariPreprocessing.terminal_on_life_loss = True
|
||||
|
||||
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
|
||||
WrappedPrioritizedReplayBuffer.batch_size = 32
|
|
@ -0,0 +1,46 @@
|
|||
# Hyperparameters follow Dabney et al. (2018).
|
||||
import dopamine.agents.fqf.fqf_agent
|
||||
import dopamine.agents.rainbow.rainbow_agent
|
||||
import dopamine.discrete_domains.atari_lib
|
||||
import dopamine.discrete_domains.run_experiment
|
||||
import dopamine.replay_memory.prioritized_replay_buffer
|
||||
import gin.tf.external_configurables
|
||||
|
||||
FQFAgent.kappa = 1.0
|
||||
FQFAgent.num_tau_samples = 32
|
||||
FQFAgent.num_tau_prime_samples = 32
|
||||
FQFAgent.num_quantile_samples = 32
|
||||
FQFAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.0001-s0'
|
||||
FQFAgent.fqf_factor = '0.000001'
|
||||
FQFAgent.fqf_ent = '0.0001'
|
||||
RainbowAgent.gamma = 0.99
|
||||
RainbowAgent.game = 'Centipede'
|
||||
RainbowAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.0001-s0'
|
||||
RainbowAgent.update_horizon = 1
|
||||
RainbowAgent.min_replay_history = 50000 # agent steps
|
||||
RainbowAgent.update_period = 4
|
||||
RainbowAgent.target_update_period = 10000 # agent steps
|
||||
RainbowAgent.epsilon_train = 0.01
|
||||
RainbowAgent.epsilon_eval = 0.001
|
||||
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
|
||||
RainbowAgent.replay_scheme = 'uniform'
|
||||
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
|
||||
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
|
||||
|
||||
tf.train.AdamOptimizer.learning_rate = 0.00005
|
||||
tf.train.AdamOptimizer.epsilon = 0.0003125
|
||||
|
||||
atari_lib.create_atari_environment.game_name = 'Centipede'
|
||||
atari_lib.create_atari_environment.sticky_actions = False
|
||||
create_agent.agent_name = 'implicit_quantile'
|
||||
Runner.num_iterations = 200
|
||||
Runner.game = 'Centipede'
|
||||
Runner.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.0001-s0'
|
||||
Runner.training_steps = 250000
|
||||
Runner.evaluation_steps = 125000
|
||||
Runner.max_steps_per_episode = 27000
|
||||
|
||||
AtariPreprocessing.terminal_on_life_loss = True
|
||||
|
||||
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
|
||||
WrappedPrioritizedReplayBuffer.batch_size = 32
|
|
@ -0,0 +1,46 @@
|
|||
# Hyperparameters follow Dabney et al. (2018).
|
||||
import dopamine.agents.fqf.fqf_agent
|
||||
import dopamine.agents.rainbow.rainbow_agent
|
||||
import dopamine.discrete_domains.atari_lib
|
||||
import dopamine.discrete_domains.run_experiment
|
||||
import dopamine.replay_memory.prioritized_replay_buffer
|
||||
import gin.tf.external_configurables
|
||||
|
||||
FQFAgent.kappa = 1.0
|
||||
FQFAgent.num_tau_samples = 32
|
||||
FQFAgent.num_tau_prime_samples = 32
|
||||
FQFAgent.num_quantile_samples = 32
|
||||
FQFAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.00001-s0'
|
||||
FQFAgent.fqf_factor = '0.00001'
|
||||
FQFAgent.fqf_ent = '0.00001'
|
||||
RainbowAgent.gamma = 0.99
|
||||
RainbowAgent.game = 'Centipede'
|
||||
RainbowAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.00001-s0'
|
||||
RainbowAgent.update_horizon = 1
|
||||
RainbowAgent.min_replay_history = 50000 # agent steps
|
||||
RainbowAgent.update_period = 4
|
||||
RainbowAgent.target_update_period = 10000 # agent steps
|
||||
RainbowAgent.epsilon_train = 0.01
|
||||
RainbowAgent.epsilon_eval = 0.001
|
||||
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
|
||||
RainbowAgent.replay_scheme = 'uniform'
|
||||
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
|
||||
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
|
||||
|
||||
tf.train.AdamOptimizer.learning_rate = 0.00005
|
||||
tf.train.AdamOptimizer.epsilon = 0.0003125
|
||||
|
||||
atari_lib.create_atari_environment.game_name = 'Centipede'
|
||||
atari_lib.create_atari_environment.sticky_actions = False
|
||||
create_agent.agent_name = 'implicit_quantile'
|
||||
Runner.num_iterations = 200
|
||||
Runner.game = 'Centipede'
|
||||
Runner.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.00001-s0'
|
||||
Runner.training_steps = 250000
|
||||
Runner.evaluation_steps = 125000
|
||||
Runner.max_steps_per_episode = 27000
|
||||
|
||||
AtariPreprocessing.terminal_on_life_loss = True
|
||||
|
||||
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
|
||||
WrappedPrioritizedReplayBuffer.batch_size = 32
|
|
@ -0,0 +1,46 @@
|
|||
# Hyperparameters follow Dabney et al. (2018).
|
||||
import dopamine.agents.fqf.fqf_agent
|
||||
import dopamine.agents.rainbow.rainbow_agent
|
||||
import dopamine.discrete_domains.atari_lib
|
||||
import dopamine.discrete_domains.run_experiment
|
||||
import dopamine.replay_memory.prioritized_replay_buffer
|
||||
import gin.tf.external_configurables
|
||||
|
||||
FQFAgent.kappa = 1.0
|
||||
FQFAgent.num_tau_samples = 32
|
||||
FQFAgent.num_tau_prime_samples = 32
|
||||
FQFAgent.num_quantile_samples = 32
|
||||
FQFAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.0001-s0'
|
||||
FQFAgent.fqf_factor = '0.00001'
|
||||
FQFAgent.fqf_ent = '0.0001'
|
||||
RainbowAgent.gamma = 0.99
|
||||
RainbowAgent.game = 'Centipede'
|
||||
RainbowAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.0001-s0'
|
||||
RainbowAgent.update_horizon = 1
|
||||
RainbowAgent.min_replay_history = 50000 # agent steps
|
||||
RainbowAgent.update_period = 4
|
||||
RainbowAgent.target_update_period = 10000 # agent steps
|
||||
RainbowAgent.epsilon_train = 0.01
|
||||
RainbowAgent.epsilon_eval = 0.001
|
||||
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
|
||||
RainbowAgent.replay_scheme = 'uniform'
|
||||
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
|
||||
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
|
||||
|
||||
tf.train.AdamOptimizer.learning_rate = 0.00005
|
||||
tf.train.AdamOptimizer.epsilon = 0.0003125
|
||||
|
||||
atari_lib.create_atari_environment.game_name = 'Centipede'
|
||||
atari_lib.create_atari_environment.sticky_actions = False
|
||||
create_agent.agent_name = 'implicit_quantile'
|
||||
Runner.num_iterations = 200
|
||||
Runner.game = 'Centipede'
|
||||
Runner.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.0001-s0'
|
||||
Runner.training_steps = 250000
|
||||
Runner.evaluation_steps = 125000
|
||||
Runner.max_steps_per_episode = 27000
|
||||
|
||||
AtariPreprocessing.terminal_on_life_loss = True
|
||||
|
||||
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
|
||||
WrappedPrioritizedReplayBuffer.batch_size = 32
|
Двоичный файл не отображается.
Двоичный файл не отображается.
|
@ -12,7 +12,7 @@ declare -a seeds=(0)
|
|||
declare -a factors=(0.00001 0.000001)
|
||||
declare -a ents=(0.0001 0.00001)
|
||||
declare -a optimizers=('rmsprop')
|
||||
declare -a losses=('directbp' 'sqloss')
|
||||
declare -a losses=('sqloss')
|
||||
|
||||
for game in "${games[@]}"
|
||||
do
|
||||
|
|
|
@ -470,6 +470,9 @@ class Runner(object):
|
|||
self._save_tensorboard_summaries(iteration, num_episodes_train,
|
||||
average_reward_train, num_episodes_eval,
|
||||
average_reward_eval)
|
||||
if not self.testing:
|
||||
self.fout_test.write('%d %f %d\n' % (iteration, average_reward_eval, num_episodes_eval))
|
||||
self.fout_test.flush()
|
||||
return statistics.data_lists
|
||||
|
||||
def _save_tensorboard_summaries(self, iteration,
|
||||
|
|
Двоичный файл не отображается.
Загрузка…
Ссылка в новой задаче