add option for directbq and sqloss

This commit is contained in:
Li Zhao 2020-07-31 12:50:15 +08:00
Родитель a8403a7462
Коммит 4d9b20e6ac
15 изменённых файлов: 372 добавлений и 1 удалений

Двоичные данные
.DS_Store поставляемый

Двоичный файл не отображается.

Двоичные данные
dopamine/.DS_Store поставляемый

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1,46 @@
# Hyperparameters follow Dabney et al. (2018).
import dopamine.agents.fqf.fqf_agent
import dopamine.agents.rainbow.rainbow_agent
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
import dopamine.replay_memory.prioritized_replay_buffer
import gin.tf.external_configurables
FQFAgent.kappa = 1.0
FQFAgent.num_tau_samples = 32
FQFAgent.num_tau_prime_samples = 32
FQFAgent.num_quantile_samples = 32
FQFAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.00001-s0'
FQFAgent.fqf_factor = '0.000001'
FQFAgent.fqf_ent = '0.00001'
RainbowAgent.gamma = 0.99
RainbowAgent.game = 'Centipede'
RainbowAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.00001-s0'
RainbowAgent.update_horizon = 1
RainbowAgent.min_replay_history = 50000 # agent steps
RainbowAgent.update_period = 4
RainbowAgent.target_update_period = 10000 # agent steps
RainbowAgent.epsilon_train = 0.01
RainbowAgent.epsilon_eval = 0.001
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
RainbowAgent.replay_scheme = 'uniform'
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
tf.train.AdamOptimizer.learning_rate = 0.00005
tf.train.AdamOptimizer.epsilon = 0.0003125
atari_lib.create_atari_environment.game_name = 'Centipede'
atari_lib.create_atari_environment.sticky_actions = False
create_agent.agent_name = 'implicit_quantile'
Runner.num_iterations = 200
Runner.game = 'Centipede'
Runner.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.00001-s0'
Runner.training_steps = 250000
Runner.evaluation_steps = 125000
Runner.max_steps_per_episode = 27000
AtariPreprocessing.terminal_on_life_loss = True
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
WrappedPrioritizedReplayBuffer.batch_size = 32

Просмотреть файл

@ -0,0 +1,46 @@
# Hyperparameters follow Dabney et al. (2018).
import dopamine.agents.fqf.fqf_agent
import dopamine.agents.rainbow.rainbow_agent
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
import dopamine.replay_memory.prioritized_replay_buffer
import gin.tf.external_configurables
FQFAgent.kappa = 1.0
FQFAgent.num_tau_samples = 32
FQFAgent.num_tau_prime_samples = 32
FQFAgent.num_quantile_samples = 32
FQFAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.0001-s0'
FQFAgent.fqf_factor = '0.000001'
FQFAgent.fqf_ent = '0.0001'
RainbowAgent.gamma = 0.99
RainbowAgent.game = 'Centipede'
RainbowAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.0001-s0'
RainbowAgent.update_horizon = 1
RainbowAgent.min_replay_history = 50000 # agent steps
RainbowAgent.update_period = 4
RainbowAgent.target_update_period = 10000 # agent steps
RainbowAgent.epsilon_train = 0.01
RainbowAgent.epsilon_eval = 0.001
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
RainbowAgent.replay_scheme = 'uniform'
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
tf.train.AdamOptimizer.learning_rate = 0.00005
tf.train.AdamOptimizer.epsilon = 0.0003125
atari_lib.create_atari_environment.game_name = 'Centipede'
atari_lib.create_atari_environment.sticky_actions = False
create_agent.agent_name = 'implicit_quantile'
Runner.num_iterations = 200
Runner.game = 'Centipede'
Runner.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.000001-e0.0001-s0'
Runner.training_steps = 250000
Runner.evaluation_steps = 125000
Runner.max_steps_per_episode = 27000
AtariPreprocessing.terminal_on_life_loss = True
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
WrappedPrioritizedReplayBuffer.batch_size = 32

Просмотреть файл

@ -0,0 +1,46 @@
# Hyperparameters follow Dabney et al. (2018).
import dopamine.agents.fqf.fqf_agent
import dopamine.agents.rainbow.rainbow_agent
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
import dopamine.replay_memory.prioritized_replay_buffer
import gin.tf.external_configurables
FQFAgent.kappa = 1.0
FQFAgent.num_tau_samples = 32
FQFAgent.num_tau_prime_samples = 32
FQFAgent.num_quantile_samples = 32
FQFAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.00001-s0'
FQFAgent.fqf_factor = '0.00001'
FQFAgent.fqf_ent = '0.00001'
RainbowAgent.gamma = 0.99
RainbowAgent.game = 'Centipede'
RainbowAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.00001-s0'
RainbowAgent.update_horizon = 1
RainbowAgent.min_replay_history = 50000 # agent steps
RainbowAgent.update_period = 4
RainbowAgent.target_update_period = 10000 # agent steps
RainbowAgent.epsilon_train = 0.01
RainbowAgent.epsilon_eval = 0.001
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
RainbowAgent.replay_scheme = 'uniform'
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
tf.train.AdamOptimizer.learning_rate = 0.00005
tf.train.AdamOptimizer.epsilon = 0.0003125
atari_lib.create_atari_environment.game_name = 'Centipede'
atari_lib.create_atari_environment.sticky_actions = False
create_agent.agent_name = 'implicit_quantile'
Runner.num_iterations = 200
Runner.game = 'Centipede'
Runner.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.00001-s0'
Runner.training_steps = 250000
Runner.evaluation_steps = 125000
Runner.max_steps_per_episode = 27000
AtariPreprocessing.terminal_on_life_loss = True
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
WrappedPrioritizedReplayBuffer.batch_size = 32

Просмотреть файл

@ -0,0 +1,46 @@
# Hyperparameters follow Dabney et al. (2018).
import dopamine.agents.fqf.fqf_agent
import dopamine.agents.rainbow.rainbow_agent
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
import dopamine.replay_memory.prioritized_replay_buffer
import gin.tf.external_configurables
FQFAgent.kappa = 1.0
FQFAgent.num_tau_samples = 32
FQFAgent.num_tau_prime_samples = 32
FQFAgent.num_quantile_samples = 32
FQFAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.0001-s0'
FQFAgent.fqf_factor = '0.00001'
FQFAgent.fqf_ent = '0.0001'
RainbowAgent.gamma = 0.99
RainbowAgent.game = 'Centipede'
RainbowAgent.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.0001-s0'
RainbowAgent.update_horizon = 1
RainbowAgent.min_replay_history = 50000 # agent steps
RainbowAgent.update_period = 4
RainbowAgent.target_update_period = 10000 # agent steps
RainbowAgent.epsilon_train = 0.01
RainbowAgent.epsilon_eval = 0.001
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
RainbowAgent.replay_scheme = 'uniform'
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
tf.train.AdamOptimizer.learning_rate = 0.00005
tf.train.AdamOptimizer.epsilon = 0.0003125
atari_lib.create_atari_environment.game_name = 'Centipede'
atari_lib.create_atari_environment.sticky_actions = False
create_agent.agent_name = 'implicit_quantile'
Runner.num_iterations = 200
Runner.game = 'Centipede'
Runner.runtype = 'iqn_fqf-ws-directbp-rmsprop-f0.00001-e0.0001-s0'
Runner.training_steps = 250000
Runner.evaluation_steps = 125000
Runner.max_steps_per_episode = 27000
AtariPreprocessing.terminal_on_life_loss = True
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
WrappedPrioritizedReplayBuffer.batch_size = 32

Просмотреть файл

@ -0,0 +1,46 @@
# Hyperparameters follow Dabney et al. (2018).
import dopamine.agents.fqf.fqf_agent
import dopamine.agents.rainbow.rainbow_agent
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
import dopamine.replay_memory.prioritized_replay_buffer
import gin.tf.external_configurables
FQFAgent.kappa = 1.0
FQFAgent.num_tau_samples = 32
FQFAgent.num_tau_prime_samples = 32
FQFAgent.num_quantile_samples = 32
FQFAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.00001-s0'
FQFAgent.fqf_factor = '0.000001'
FQFAgent.fqf_ent = '0.00001'
RainbowAgent.gamma = 0.99
RainbowAgent.game = 'Centipede'
RainbowAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.00001-s0'
RainbowAgent.update_horizon = 1
RainbowAgent.min_replay_history = 50000 # agent steps
RainbowAgent.update_period = 4
RainbowAgent.target_update_period = 10000 # agent steps
RainbowAgent.epsilon_train = 0.01
RainbowAgent.epsilon_eval = 0.001
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
RainbowAgent.replay_scheme = 'uniform'
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
tf.train.AdamOptimizer.learning_rate = 0.00005
tf.train.AdamOptimizer.epsilon = 0.0003125
atari_lib.create_atari_environment.game_name = 'Centipede'
atari_lib.create_atari_environment.sticky_actions = False
create_agent.agent_name = 'implicit_quantile'
Runner.num_iterations = 200
Runner.game = 'Centipede'
Runner.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.00001-s0'
Runner.training_steps = 250000
Runner.evaluation_steps = 125000
Runner.max_steps_per_episode = 27000
AtariPreprocessing.terminal_on_life_loss = True
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
WrappedPrioritizedReplayBuffer.batch_size = 32

Просмотреть файл

@ -0,0 +1,46 @@
# Hyperparameters follow Dabney et al. (2018).
import dopamine.agents.fqf.fqf_agent
import dopamine.agents.rainbow.rainbow_agent
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
import dopamine.replay_memory.prioritized_replay_buffer
import gin.tf.external_configurables
FQFAgent.kappa = 1.0
FQFAgent.num_tau_samples = 32
FQFAgent.num_tau_prime_samples = 32
FQFAgent.num_quantile_samples = 32
FQFAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.0001-s0'
FQFAgent.fqf_factor = '0.000001'
FQFAgent.fqf_ent = '0.0001'
RainbowAgent.gamma = 0.99
RainbowAgent.game = 'Centipede'
RainbowAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.0001-s0'
RainbowAgent.update_horizon = 1
RainbowAgent.min_replay_history = 50000 # agent steps
RainbowAgent.update_period = 4
RainbowAgent.target_update_period = 10000 # agent steps
RainbowAgent.epsilon_train = 0.01
RainbowAgent.epsilon_eval = 0.001
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
RainbowAgent.replay_scheme = 'uniform'
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
tf.train.AdamOptimizer.learning_rate = 0.00005
tf.train.AdamOptimizer.epsilon = 0.0003125
atari_lib.create_atari_environment.game_name = 'Centipede'
atari_lib.create_atari_environment.sticky_actions = False
create_agent.agent_name = 'implicit_quantile'
Runner.num_iterations = 200
Runner.game = 'Centipede'
Runner.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.000001-e0.0001-s0'
Runner.training_steps = 250000
Runner.evaluation_steps = 125000
Runner.max_steps_per_episode = 27000
AtariPreprocessing.terminal_on_life_loss = True
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
WrappedPrioritizedReplayBuffer.batch_size = 32

Просмотреть файл

@ -0,0 +1,46 @@
# Hyperparameters follow Dabney et al. (2018).
import dopamine.agents.fqf.fqf_agent
import dopamine.agents.rainbow.rainbow_agent
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
import dopamine.replay_memory.prioritized_replay_buffer
import gin.tf.external_configurables
FQFAgent.kappa = 1.0
FQFAgent.num_tau_samples = 32
FQFAgent.num_tau_prime_samples = 32
FQFAgent.num_quantile_samples = 32
FQFAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.00001-s0'
FQFAgent.fqf_factor = '0.00001'
FQFAgent.fqf_ent = '0.00001'
RainbowAgent.gamma = 0.99
RainbowAgent.game = 'Centipede'
RainbowAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.00001-s0'
RainbowAgent.update_horizon = 1
RainbowAgent.min_replay_history = 50000 # agent steps
RainbowAgent.update_period = 4
RainbowAgent.target_update_period = 10000 # agent steps
RainbowAgent.epsilon_train = 0.01
RainbowAgent.epsilon_eval = 0.001
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
RainbowAgent.replay_scheme = 'uniform'
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
tf.train.AdamOptimizer.learning_rate = 0.00005
tf.train.AdamOptimizer.epsilon = 0.0003125
atari_lib.create_atari_environment.game_name = 'Centipede'
atari_lib.create_atari_environment.sticky_actions = False
create_agent.agent_name = 'implicit_quantile'
Runner.num_iterations = 200
Runner.game = 'Centipede'
Runner.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.00001-s0'
Runner.training_steps = 250000
Runner.evaluation_steps = 125000
Runner.max_steps_per_episode = 27000
AtariPreprocessing.terminal_on_life_loss = True
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
WrappedPrioritizedReplayBuffer.batch_size = 32

Просмотреть файл

@ -0,0 +1,46 @@
# Hyperparameters follow Dabney et al. (2018).
import dopamine.agents.fqf.fqf_agent
import dopamine.agents.rainbow.rainbow_agent
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
import dopamine.replay_memory.prioritized_replay_buffer
import gin.tf.external_configurables
FQFAgent.kappa = 1.0
FQFAgent.num_tau_samples = 32
FQFAgent.num_tau_prime_samples = 32
FQFAgent.num_quantile_samples = 32
FQFAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.0001-s0'
FQFAgent.fqf_factor = '0.00001'
FQFAgent.fqf_ent = '0.0001'
RainbowAgent.gamma = 0.99
RainbowAgent.game = 'Centipede'
RainbowAgent.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.0001-s0'
RainbowAgent.update_horizon = 1
RainbowAgent.min_replay_history = 50000 # agent steps
RainbowAgent.update_period = 4
RainbowAgent.target_update_period = 10000 # agent steps
RainbowAgent.epsilon_train = 0.01
RainbowAgent.epsilon_eval = 0.001
RainbowAgent.epsilon_decay_period = 1000000 # agent steps
RainbowAgent.replay_scheme = 'uniform'
RainbowAgent.tf_device = '/gpu:0' # '/cpu:*' use for non-GPU version
RainbowAgent.optimizer = @tf.train.AdamOptimizer()
tf.train.AdamOptimizer.learning_rate = 0.00005
tf.train.AdamOptimizer.epsilon = 0.0003125
atari_lib.create_atari_environment.game_name = 'Centipede'
atari_lib.create_atari_environment.sticky_actions = False
create_agent.agent_name = 'implicit_quantile'
Runner.num_iterations = 200
Runner.game = 'Centipede'
Runner.runtype = 'iqn_fqf-ws-sqloss-rmsprop-f0.00001-e0.0001-s0'
Runner.training_steps = 250000
Runner.evaluation_steps = 125000
Runner.max_steps_per_episode = 27000
AtariPreprocessing.terminal_on_life_loss = True
WrappedPrioritizedReplayBuffer.replay_capacity = 1000000
WrappedPrioritizedReplayBuffer.batch_size = 32

Двоичные данные
dopamine/discrete_domains/.DS_Store поставляемый

Двоичный файл не отображается.

Двоичные данные
dopamine/discrete_domains/.run_experiment.py.swp Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -12,7 +12,7 @@ declare -a seeds=(0)
declare -a factors=(0.00001 0.000001)
declare -a ents=(0.0001 0.00001)
declare -a optimizers=('rmsprop')
declare -a losses=('directbp' 'sqloss')
declare -a losses=('sqloss')
for game in "${games[@]}"
do

Просмотреть файл

@ -470,6 +470,9 @@ class Runner(object):
self._save_tensorboard_summaries(iteration, num_episodes_train,
average_reward_train, num_episodes_eval,
average_reward_eval)
if not self.testing:
self.fout_test.write('%d %f %d\n' % (iteration, average_reward_eval, num_episodes_eval))
self.fout_test.flush()
return statistics.data_lists
def _save_tensorboard_summaries(self, iteration,

Двоичные данные
dopamine/replay_memory/.DS_Store поставляемый Normal file

Двоичный файл не отображается.