Usability fixes. Move epsilon reduction to trainer in distributed RL training.
This commit is contained in:
Родитель
d6ce104e76
Коммит
4d0ff77b24
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -0,0 +1,16 @@
|
|||
import os
|
||||
|
||||
# Run this script from within an anaconda virtual environment to install the required packages
|
||||
# Be sure to run this script as root or as administrator.
|
||||
|
||||
os.system('python -m pip install --upgrade pip')
|
||||
#os.system('conda update -n base conda')
|
||||
os.system('conda install jupyter')
|
||||
os.system('pip install matplotlib==2.1.2')
|
||||
os.system('pip install image')
|
||||
os.system('pip install keras_tqdm')
|
||||
os.system('conda install -c conda-forge opencv')
|
||||
os.system('pip install msgpack-rpc-python')
|
||||
os.system('pip install pandas')
|
||||
os.system('pip install numpy')
|
||||
os.system('conda install scipy')
|
|
@ -45,6 +45,16 @@ You should also be comfortable with Python. At the very least, you should be abl
|
|||
3. [Install h5py](http://docs.h5py.org/en/latest/build.html)
|
||||
4. [Install Keras](https://keras.io/#installation) and [configure the Keras backend](https://keras.io/backend/) to work with TensorFlow (default) or CNTK.
|
||||
5. [Install AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy). Be sure to add the location for the AzCopy executable to your system path.
|
||||
6. Install the other dependencies. From your anaconda environment, run "InstallPackages.py" as root or administrator. This installs the following packages into your environment:
|
||||
* jupyter
|
||||
* matplotlib v. 2.1.2
|
||||
* image
|
||||
* keras_tqdm
|
||||
* opencv
|
||||
* msgpack-rpc-python
|
||||
* pandas
|
||||
* numpy
|
||||
* scipy
|
||||
|
||||
### Simulator Package
|
||||
|
||||
|
|
|
@ -13,22 +13,45 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using TensorFlow backend.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using model model/models\\model_model.02-0.0216478.h5 for testing.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from keras.models import load_model\n",
|
||||
"import sys\n",
|
||||
"import numpy as np\n",
|
||||
"import glob\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if ('../../PythonClient/' not in sys.path):\n",
|
||||
" sys.path.insert(0, '../../PythonClient/')\n",
|
||||
"from AirSimClient import *\n",
|
||||
"\n",
|
||||
"# << Set this to the path of the model >>\n",
|
||||
"MODEL_PATH = 'model/models/sample_model.h5'"
|
||||
"# If None, then the model with the lowest validation loss from training will be used\n",
|
||||
"MODEL_PATH = None\n",
|
||||
"\n",
|
||||
"if (MODEL_PATH == None):\n",
|
||||
" models = glob.glob('model/models/*.h5') \n",
|
||||
" best_model = max(models, key=os.path.getctime)\n",
|
||||
" MODEL_PATH = best_model\n",
|
||||
" \n",
|
||||
"print('Using model {0} for testing.'.format(best_model))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -40,16 +63,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Waiting for connection: \n"
|
||||
"Waiting for connection: \n",
|
||||
"Connection established!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -59,7 +81,8 @@
|
|||
"client = CarClient()\n",
|
||||
"client.confirmConnection()\n",
|
||||
"client.enableApiControl(True)\n",
|
||||
"car_controls = CarControls()"
|
||||
"car_controls = CarControls()\n",
|
||||
"print('Connection established!')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -71,10 +94,8 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"car_controls.steering = 0\n",
|
||||
|
@ -94,10 +115,8 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_image():\n",
|
||||
|
@ -118,9 +137,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"while (True):\n",
|
||||
|
@ -189,7 +206,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.0"
|
||||
"version": "3.6.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -11,10 +11,8 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os"
|
||||
|
@ -34,28 +32,30 @@
|
|||
"* **replay_memory_size**: The number of examples to keep in the replay memory. The replay memory is a FIFO buffer used to reduce the effects of nearby states being correlated. Minibatches are generated from randomly selecting examples from the replay memory.\n",
|
||||
"* **weights_path**: If we are doing transfer learning and using pretrained weights for the model, they will be loaded from this path.\n",
|
||||
"* **train_conv_layers**: If we are using pretrained weights, we may prefer to freeze the convolutional layers to speed up training.\n",
|
||||
"* **airsim_path**: The path to the .ps1 to start AirSim. This path cannot contain spaces.\n",
|
||||
"* **data_dir**: The path to the directory containing the road_points.txt and reward_points.txt used to compute the reward function. This path cannot contain spaces."
|
||||
"* **airsim_path**: The path to the folder containing the .ps1 to start AirSim. This path cannot contain spaces.\n",
|
||||
"* **data_dir**: The path to the directory containing the road_points.txt and reward_points.txt used to compute the reward function. This path cannot contain spaces.\n",
|
||||
"* **experiment_name**: A unique identifier for this experiment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"batch_update_frequency = 300\n",
|
||||
"#batch_update_frequency = 300\n",
|
||||
"batch_update_frequency = 10\n",
|
||||
"max_epoch_runtime_sec = 30\n",
|
||||
"per_iter_epsilon_reduction=0.003\n",
|
||||
"min_epsilon = 0.1\n",
|
||||
"batch_size = 32\n",
|
||||
"replay_memory_size = 2000\n",
|
||||
"weights_path = 'D:\\\\data\\\\pretrain_model_weights.h5'\n",
|
||||
"#replay_memory_size = 2000\n",
|
||||
"replay_memory_size = 50\n",
|
||||
"weights_path = os.path.join(os.getcwd(), 'Share\\\\data\\\\pretrain_model_weights.h5')\n",
|
||||
"train_conv_layers = 'false'\n",
|
||||
"airsim_path = 'D:\\\\AD_Cookbook_AirSim\\\\'\n",
|
||||
"data_dir = os.path.join(os.getcwd(), 'Share')"
|
||||
"airsim_path = 'E:\\\\AD_Cookbook_AirSim\\\\'\n",
|
||||
"data_dir = os.path.join(os.getcwd(), 'Share')\n",
|
||||
"experiment_name = 'local_run'"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -67,10 +67,8 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_cmd = 'python distributed_agent.py'\n",
|
||||
|
@ -84,6 +82,8 @@
|
|||
"train_cmd += ' train_conv_layers={0}'.format(train_conv_layers)\n",
|
||||
"train_cmd += ' airsim_path={0}'.format(airsim_path)\n",
|
||||
"train_cmd += ' data_dir={0}'.format(data_dir)\n",
|
||||
"train_cmd += ' experiment_name={0}'.format(experiment_name)\n",
|
||||
"train_cmd += ' local_run=true'\n",
|
||||
"\n",
|
||||
"with open(os.path.join(os.getcwd(), 'Share/scripts_downpour/app/train.bat'), 'w') as f:\n",
|
||||
" f.write(train_cmd)"
|
||||
|
@ -113,7 +113,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.0"
|
||||
"version": "3.6.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -126,9 +126,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
|
@ -144,7 +142,7 @@
|
|||
"# Trainer task\n",
|
||||
"tasks.append(batchmodels.TaskAddParameter(\n",
|
||||
" id='TrainerTask',\n",
|
||||
" command_line=r'call C:\\\\prereq\\\\mount.bat && C:\\\\ProgramData\\\\Anaconda3\\\\Scripts\\\\activate.bat py36 && python -u Z:\\\\scripts_downpour\\\\manage.py runserver 0.0.0.0:80 data_dir=Z:\\\\\\\\ role=trainer experiment_name={0} batch_update_frequency={1} weights_path={2} train_conv_layers={3}'.format(job_id, batch_update_frequency, weights_path, train_conv_layers),\n",
|
||||
" command_line=r'call C:\\\\prereq\\\\mount.bat && C:\\\\ProgramData\\\\Anaconda3\\\\Scripts\\\\activate.bat py36 && python -u Z:\\\\scripts_downpour\\\\manage.py runserver 0.0.0.0:80 data_dir=Z:\\\\\\\\ role=trainer experiment_name={0} batch_update_frequency={1} weights_path={2} train_conv_layers={3} per_iter_epsilon_reduction={4} min_epsilon={5}'.format(job_id, batch_update_frequency, weights_path, train_conv_layers, per_iter_epsilon_reduction, min_epsilon),\n",
|
||||
" display_name='Trainer',\n",
|
||||
" user_identity=batchmodels.UserIdentity(user_name=NOTEBOOK_CONFIG['batch_job_user_name']),\n",
|
||||
" multi_instance_settings = batchmodels.MultiInstanceSettings(number_of_instances=1, coordination_command_line='cls')\n",
|
||||
|
@ -198,7 +196,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.0"
|
||||
"version": "3.6.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -34,13 +34,23 @@ It is also highly recommended that the reader has familiarity with the AirSim si
|
|||
|
||||
#### Environment Setup
|
||||
|
||||
2. [Install Anaconda](https://conda.io/docs/user-guide/install/index.html) with Python 3.5 or higher.
|
||||
3. [Install Tensorflow](https://www.tensorflow.org/install/install_windows)
|
||||
4. [Install h5py](http://docs.h5py.org/en/latest/build.html)
|
||||
1. [Install Anaconda](https://conda.io/docs/user-guide/install/index.html) with Python 3.5 or higher.
|
||||
2. [Install Tensorflow](https://www.tensorflow.org/install/install_windows)
|
||||
3. [Install h5py](http://docs.h5py.org/en/latest/build.html)
|
||||
4. [Install Keras](https://keras.io/#installation)
|
||||
7. [Install AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy). Be sure to add the location for the AzCopy executable to your system path.
|
||||
8. [Install the latest verison of Azure Powershell](https://docs.microsoft.com/en-us/powershell/azure/install-azurerm-ps?view=azurermps-5.3.0).
|
||||
9. [Install the latest version of the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest).
|
||||
5. [Install AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy). Be sure to add the location for the AzCopy executable to your system path.
|
||||
6. [Install the latest verison of Azure Powershell](https://docs.microsoft.com/en-us/powershell/azure/install-azurerm-ps?view=azurermps-5.3.0).
|
||||
7. [Install the latest version of the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest).
|
||||
8. Install the other dependencies. From your anaconda environment, run "InstallPackages.py" as root or administrator. This installs the following packages into your environment:
|
||||
* jupyter
|
||||
* matplotlib v. 2.1.2
|
||||
* image
|
||||
* keras_tqdm
|
||||
* opencv
|
||||
* msgpack-rpc-python
|
||||
* pandas
|
||||
* numpy
|
||||
* scipy
|
||||
|
||||
#### Simulator Package
|
||||
|
||||
|
|
|
@ -13,10 +13,8 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from Share.scripts_downpour.app.airsim_client import *\n",
|
||||
|
@ -41,10 +39,9 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -54,27 +51,27 @@
|
|||
"_________________________________________________________________\n",
|
||||
"Layer (type) Output Shape Param # \n",
|
||||
"=================================================================\n",
|
||||
"input_4 (InputLayer) (None, 59, 255, 3) 0 \n",
|
||||
"input_3 (InputLayer) (None, 59, 255, 3) 0 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"convolution0 (Conv2D) (None, 59, 255, 16) 448 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"max_pooling2d_10 (MaxPooling (None, 29, 127, 16) 0 \n",
|
||||
"max_pooling2d_7 (MaxPooling2 (None, 29, 127, 16) 0 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"convolution1 (Conv2D) (None, 29, 127, 32) 4640 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"max_pooling2d_11 (MaxPooling (None, 14, 63, 32) 0 \n",
|
||||
"max_pooling2d_8 (MaxPooling2 (None, 14, 63, 32) 0 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"convolution2 (Conv2D) (None, 14, 63, 32) 9248 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"max_pooling2d_12 (MaxPooling (None, 7, 31, 32) 0 \n",
|
||||
"max_pooling2d_9 (MaxPooling2 (None, 7, 31, 32) 0 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"flatten_4 (Flatten) (None, 6944) 0 \n",
|
||||
"flatten_3 (Flatten) (None, 6944) 0 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"dropout_7 (Dropout) (None, 6944) 0 \n",
|
||||
"dropout_5 (Dropout) (None, 6944) 0 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"rl_dense (Dense) (None, 128) 888960 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"dropout_8 (Dropout) (None, 128) 0 \n",
|
||||
"dropout_6 (Dropout) (None, 128) 0 \n",
|
||||
"_________________________________________________________________\n",
|
||||
"rl_output (Dense) (None, 5) 645 \n",
|
||||
"=================================================================\n",
|
||||
|
@ -102,11 +99,19 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Connecting to AirSim...\n",
|
||||
"Waiting for connection: \n",
|
||||
"Connected!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print('Connecting to AirSim...')\n",
|
||||
"car_client = CarClient()\n",
|
||||
|
@ -125,10 +130,8 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_image(car_client):\n",
|
||||
|
@ -149,9 +152,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def append_to_ring_buffer(item, buffer, buffer_size):\n",
|
||||
|
@ -214,7 +215,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.0"
|
||||
"version": "3.6.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -13,6 +13,7 @@ import sys
|
|||
import requests
|
||||
import PIL
|
||||
import copy
|
||||
import datetime
|
||||
|
||||
# A class that represents the agent that will drive the vehicle, train the model, and send the gradient updates to the trainer.
|
||||
class DistributedAgent():
|
||||
|
@ -24,6 +25,8 @@ class DistributedAgent():
|
|||
|
||||
parameters['role_type'] = 'agent'
|
||||
|
||||
|
||||
print('Starting time: {0}'.format(datetime.datetime.utcnow()), file=sys.stderr)
|
||||
self.__model_buffer = None
|
||||
self.__model = None
|
||||
self.__airsim_started = False
|
||||
|
@ -280,9 +283,12 @@ class DistributedAgent():
|
|||
# This constraint is so the model doesn't end up having to churn through huge chunks of data, slowing down training
|
||||
# 4) The car has run off the road
|
||||
if (collision_info.has_collided or car_state.speed < 2 or utc_now > end_time or far_off):
|
||||
print('Start time: {0}, end time: {1}'.format(start_time, utc_now), file=sys.stderr)
|
||||
if (utc_now > end_time):
|
||||
print('timed out.')
|
||||
print('Full autonomous run finished at {0}'.format(utc_now), file=sys.stderr)
|
||||
done = True
|
||||
sys.stderr.flush()
|
||||
else:
|
||||
|
||||
# The Agent should occasionally pick random action instead of best action
|
||||
|
@ -338,6 +344,7 @@ class DistributedAgent():
|
|||
print('Num total actions: {0}'.format(len(actions)))
|
||||
|
||||
# If we are in the main loop, reduce the epsilon parameter so that the model will be called more often
|
||||
# Note: this will be overwritten by the trainer's epsilon if running in distributed mode
|
||||
if not always_random:
|
||||
self.__epsilon -= self.__per_iter_epsilon_reduction
|
||||
self.__epsilon = max(self.__epsilon, self.__min_epsilon)
|
||||
|
@ -399,12 +406,21 @@ class DistributedAgent():
|
|||
post_data['gradients'] = gradients
|
||||
post_data['batch_count'] = batches_count
|
||||
|
||||
new_model_parameters = requests.post('http://{0}:80/gradient_update'.format(self.__trainer_ip_address), json=post_data)
|
||||
print('New params:')
|
||||
print(new_model_parameters)
|
||||
response = requests.post('http://{0}:80/gradient_update'.format(self.__trainer_ip_address), json=post_data)
|
||||
print('Response:')
|
||||
print(response)
|
||||
|
||||
new_model_parameters = response.json()
|
||||
|
||||
# Update the existing model with the new parameters
|
||||
self.__model.from_packet(new_model_parameters.json())
|
||||
self.__model.from_packet(new_model_parameters)
|
||||
|
||||
#If the trainer sends us a epsilon, allow it to override our local value
|
||||
if ('epsilon' in new_model_parameters):
|
||||
new_epsilon = float(new_model_parameters['epsilon'])
|
||||
print('Overriding local epsilon with {0}, which was sent from trainer'.format(new_epsilon))
|
||||
self.__epsilon = new_epsilon
|
||||
|
||||
else:
|
||||
if (self.__num_batches_run > self.__batch_update_frequency + self.__last_checkpoint_batch_count):
|
||||
self.__model.update_critic()
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
python distributed_agent.py batch_update_frequency=10 max_epoch_runtime_sec=30 per_iter_epsilon_reduction=0.003 min_epsilon=0.1 batch_size=32 replay_memory_size=50 weights_path=F:\ADCookbookUsabilityFixes\AutonomousDrivingCookbook\DistributedRL\Share\data\pretrain_model_weights.h5 train_conv_layers=false airsim_path=E:\AD_Cookbook_AirSim\ data_dir=F:\ADCookbookUsabilityFixes\AutonomousDrivingCookbook\DistributedRL\Share experiment_name=local_run local_run=true
|
|
@ -27,6 +27,10 @@ next_batch_update_count = 0
|
|||
checkpoint_dir = ''
|
||||
agents_having_latest_critic = []
|
||||
|
||||
min_epsilon = float(os.environ['min_epsilon'])
|
||||
epsilon_step = float(os.environ['per_iter_epsilon_reduction'])
|
||||
epsilon = 1.0
|
||||
|
||||
# A simple endpoint that can be used to determine if the trainer is online.
|
||||
# All requests will be responded to with a JSON {"message": "PONG"}
|
||||
# Routed to /ping
|
||||
|
@ -52,6 +56,9 @@ def gradient_update(request):
|
|||
global batch_update_frequency
|
||||
global checkpoint_dir
|
||||
global agents_having_latest_critic
|
||||
global epsilon
|
||||
global epsilon_step
|
||||
global min_epsilon
|
||||
try:
|
||||
# Check that the request is a POST
|
||||
if (request.method != 'POST'):
|
||||
|
@ -101,6 +108,13 @@ def gradient_update(request):
|
|||
print('Agent {0} has received the latest critic model. Sending only the actor.'.format(request_ip))
|
||||
model_response = rl_model.to_packet(get_target=False)
|
||||
|
||||
epsilon -= epsilon_step
|
||||
epsilon = max(epsilon, min_epsilon)
|
||||
|
||||
print('Sending epsilon of {0} to {1}'.format(epsilon, request_ip))
|
||||
|
||||
model_response['epsilon'] = epsilon
|
||||
|
||||
# Send the response to the agent.
|
||||
return JsonResponse(model_response)
|
||||
finally:
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
import os
|
||||
|
||||
# Run this script from within an anaconda virtual environment to install the required packages
|
||||
# Be sure to run this script as root or as administrator.
|
||||
|
||||
os.system('python -m pip install --upgrade pip')
|
||||
#os.system('conda update -n base conda')
|
||||
os.system('conda install jupyter')
|
||||
os.system('pip install matplotlib==2.1.2')
|
||||
os.system('pip install image')
|
||||
os.system('pip install keras_tqdm')
|
||||
os.system('conda install -c conda-forge opencv')
|
||||
os.system('pip install msgpack-rpc-python')
|
||||
os.system('pip install pandas')
|
||||
os.system('pip install numpy')
|
||||
os.system('conda install scipy')
|
Загрузка…
Ссылка в новой задаче