Usability fixes. Move epsilon reduction to trainer in distributed RL training.

This commit is contained in:
pYr0rAGE 2018-03-18 17:59:19 -07:00
Родитель d6ce104e76
Коммит 4d0ff77b24
13 изменённых файлов: 249 добавлений и 178 удалений

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -0,0 +1,16 @@
import os
# Run this script from within an anaconda virtual environment to install the required packages
# Be sure to run this script as root or as administrator.
os.system('python -m pip install --upgrade pip')
#os.system('conda update -n base conda')
os.system('conda install jupyter')
os.system('pip install matplotlib==2.1.2')
os.system('pip install image')
os.system('pip install keras_tqdm')
os.system('conda install -c conda-forge opencv')
os.system('pip install msgpack-rpc-python')
os.system('pip install pandas')
os.system('pip install numpy')
os.system('conda install scipy')

Просмотреть файл

@ -45,6 +45,16 @@ You should also be comfortable with Python. At the very least, you should be abl
3. [Install h5py](http://docs.h5py.org/en/latest/build.html)
4. [Install Keras](https://keras.io/#installation) and [configure the Keras backend](https://keras.io/backend/) to work with TensorFlow (default) or CNTK.
5. [Install AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy). Be sure to add the location for the AzCopy executable to your system path.
6. Install the other dependencies. From your anaconda environment, run "InstallPackages.py" as root or administrator. This installs the following packages into your environment:
* jupyter
* matplotlib v. 2.1.2
* image
* keras_tqdm
* opencv
* msgpack-rpc-python
* pandas
* numpy
* scipy
### Simulator Package

Просмотреть файл

@ -13,22 +13,45 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using model model/models\\model_model.02-0.0216478.h5 for testing.\n"
]
}
],
"source": [
"from keras.models import load_model\n",
"import sys\n",
"import numpy as np\n",
"import glob\n",
"import os\n",
"\n",
"if ('../../PythonClient/' not in sys.path):\n",
" sys.path.insert(0, '../../PythonClient/')\n",
"from AirSimClient import *\n",
"\n",
"# << Set this to the path of the model >>\n",
"MODEL_PATH = 'model/models/sample_model.h5'"
"# If None, then the model with the lowest validation loss from training will be used\n",
"MODEL_PATH = None\n",
"\n",
"if (MODEL_PATH == None):\n",
" models = glob.glob('model/models/*.h5') \n",
" best_model = max(models, key=os.path.getctime)\n",
" MODEL_PATH = best_model\n",
" \n",
"print('Using model {0} for testing.'.format(best_model))"
]
},
{
@ -40,16 +63,15 @@
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Waiting for connection: \n"
"Waiting for connection: \n",
"Connection established!\n"
]
}
],
@ -59,7 +81,8 @@
"client = CarClient()\n",
"client.confirmConnection()\n",
"client.enableApiControl(True)\n",
"car_controls = CarControls()"
"car_controls = CarControls()\n",
"print('Connection established!')"
]
},
{
@ -71,10 +94,8 @@
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"car_controls.steering = 0\n",
@ -94,10 +115,8 @@
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def get_image():\n",
@ -118,9 +137,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"while (True):\n",
@ -189,7 +206,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
"version": "3.6.4"
}
},
"nbformat": 4,

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -11,10 +11,8 @@
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os"
@ -34,28 +32,30 @@
"* **replay_memory_size**: The number of examples to keep in the replay memory. The replay memory is a FIFO buffer used to reduce the effects of nearby states being correlated. Minibatches are generated from randomly selecting examples from the replay memory.\n",
"* **weights_path**: If we are doing transfer learning and using pretrained weights for the model, they will be loaded from this path.\n",
"* **train_conv_layers**: If we are using pretrained weights, we may prefer to freeze the convolutional layers to speed up training.\n",
"* **airsim_path**: The path to the .ps1 to start AirSim. This path cannot contain spaces.\n",
"* **data_dir**: The path to the directory containing the road_points.txt and reward_points.txt used to compute the reward function. This path cannot contain spaces."
"* **airsim_path**: The path to the folder containing the .ps1 to start AirSim. This path cannot contain spaces.\n",
"* **data_dir**: The path to the directory containing the road_points.txt and reward_points.txt used to compute the reward function. This path cannot contain spaces.\n",
"* **experiment_name**: A unique identifier for this experiment"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"batch_update_frequency = 300\n",
"#batch_update_frequency = 300\n",
"batch_update_frequency = 10\n",
"max_epoch_runtime_sec = 30\n",
"per_iter_epsilon_reduction=0.003\n",
"min_epsilon = 0.1\n",
"batch_size = 32\n",
"replay_memory_size = 2000\n",
"weights_path = 'D:\\\\data\\\\pretrain_model_weights.h5'\n",
"#replay_memory_size = 2000\n",
"replay_memory_size = 50\n",
"weights_path = os.path.join(os.getcwd(), 'Share\\\\data\\\\pretrain_model_weights.h5')\n",
"train_conv_layers = 'false'\n",
"airsim_path = 'D:\\\\AD_Cookbook_AirSim\\\\'\n",
"data_dir = os.path.join(os.getcwd(), 'Share')"
"airsim_path = 'E:\\\\AD_Cookbook_AirSim\\\\'\n",
"data_dir = os.path.join(os.getcwd(), 'Share')\n",
"experiment_name = 'local_run'"
]
},
{
@ -67,10 +67,8 @@
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"train_cmd = 'python distributed_agent.py'\n",
@ -84,6 +82,8 @@
"train_cmd += ' train_conv_layers={0}'.format(train_conv_layers)\n",
"train_cmd += ' airsim_path={0}'.format(airsim_path)\n",
"train_cmd += ' data_dir={0}'.format(data_dir)\n",
"train_cmd += ' experiment_name={0}'.format(experiment_name)\n",
"train_cmd += ' local_run=true'\n",
"\n",
"with open(os.path.join(os.getcwd(), 'Share/scripts_downpour/app/train.bat'), 'w') as f:\n",
" f.write(train_cmd)"
@ -113,7 +113,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
"version": "3.6.4"
}
},
"nbformat": 4,

Просмотреть файл

@ -126,9 +126,7 @@
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -144,7 +142,7 @@
"# Trainer task\n",
"tasks.append(batchmodels.TaskAddParameter(\n",
" id='TrainerTask',\n",
" command_line=r'call C:\\\\prereq\\\\mount.bat && C:\\\\ProgramData\\\\Anaconda3\\\\Scripts\\\\activate.bat py36 && python -u Z:\\\\scripts_downpour\\\\manage.py runserver 0.0.0.0:80 data_dir=Z:\\\\\\\\ role=trainer experiment_name={0} batch_update_frequency={1} weights_path={2} train_conv_layers={3}'.format(job_id, batch_update_frequency, weights_path, train_conv_layers),\n",
" command_line=r'call C:\\\\prereq\\\\mount.bat && C:\\\\ProgramData\\\\Anaconda3\\\\Scripts\\\\activate.bat py36 && python -u Z:\\\\scripts_downpour\\\\manage.py runserver 0.0.0.0:80 data_dir=Z:\\\\\\\\ role=trainer experiment_name={0} batch_update_frequency={1} weights_path={2} train_conv_layers={3} per_iter_epsilon_reduction={4} min_epsilon={5}'.format(job_id, batch_update_frequency, weights_path, train_conv_layers, per_iter_epsilon_reduction, min_epsilon),\n",
" display_name='Trainer',\n",
" user_identity=batchmodels.UserIdentity(user_name=NOTEBOOK_CONFIG['batch_job_user_name']),\n",
" multi_instance_settings = batchmodels.MultiInstanceSettings(number_of_instances=1, coordination_command_line='cls')\n",
@ -198,7 +196,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
"version": "3.6.4"
}
},
"nbformat": 4,

Просмотреть файл

@ -34,13 +34,23 @@ It is also highly recommended that the reader has familiarity with the AirSim si
#### Environment Setup
2. [Install Anaconda](https://conda.io/docs/user-guide/install/index.html) with Python 3.5 or higher.
3. [Install Tensorflow](https://www.tensorflow.org/install/install_windows)
4. [Install h5py](http://docs.h5py.org/en/latest/build.html)
1. [Install Anaconda](https://conda.io/docs/user-guide/install/index.html) with Python 3.5 or higher.
2. [Install Tensorflow](https://www.tensorflow.org/install/install_windows)
3. [Install h5py](http://docs.h5py.org/en/latest/build.html)
4. [Install Keras](https://keras.io/#installation)
7. [Install AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy). Be sure to add the location for the AzCopy executable to your system path.
8. [Install the latest verison of Azure Powershell](https://docs.microsoft.com/en-us/powershell/azure/install-azurerm-ps?view=azurermps-5.3.0).
9. [Install the latest version of the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest).
5. [Install AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy). Be sure to add the location for the AzCopy executable to your system path.
6. [Install the latest verison of Azure Powershell](https://docs.microsoft.com/en-us/powershell/azure/install-azurerm-ps?view=azurermps-5.3.0).
7. [Install the latest version of the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest).
8. Install the other dependencies. From your anaconda environment, run "InstallPackages.py" as root or administrator. This installs the following packages into your environment:
* jupyter
* matplotlib v. 2.1.2
* image
* keras_tqdm
* opencv
* msgpack-rpc-python
* pandas
* numpy
* scipy
#### Simulator Package

Просмотреть файл

@ -13,10 +13,8 @@
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from Share.scripts_downpour.app.airsim_client import *\n",
@ -41,10 +39,9 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 8,
"metadata": {
"collapsed": false,
"scrolled": true
"scrolled": false
},
"outputs": [
{
@ -54,27 +51,27 @@
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"input_4 (InputLayer) (None, 59, 255, 3) 0 \n",
"input_3 (InputLayer) (None, 59, 255, 3) 0 \n",
"_________________________________________________________________\n",
"convolution0 (Conv2D) (None, 59, 255, 16) 448 \n",
"_________________________________________________________________\n",
"max_pooling2d_10 (MaxPooling (None, 29, 127, 16) 0 \n",
"max_pooling2d_7 (MaxPooling2 (None, 29, 127, 16) 0 \n",
"_________________________________________________________________\n",
"convolution1 (Conv2D) (None, 29, 127, 32) 4640 \n",
"_________________________________________________________________\n",
"max_pooling2d_11 (MaxPooling (None, 14, 63, 32) 0 \n",
"max_pooling2d_8 (MaxPooling2 (None, 14, 63, 32) 0 \n",
"_________________________________________________________________\n",
"convolution2 (Conv2D) (None, 14, 63, 32) 9248 \n",
"_________________________________________________________________\n",
"max_pooling2d_12 (MaxPooling (None, 7, 31, 32) 0 \n",
"max_pooling2d_9 (MaxPooling2 (None, 7, 31, 32) 0 \n",
"_________________________________________________________________\n",
"flatten_4 (Flatten) (None, 6944) 0 \n",
"flatten_3 (Flatten) (None, 6944) 0 \n",
"_________________________________________________________________\n",
"dropout_7 (Dropout) (None, 6944) 0 \n",
"dropout_5 (Dropout) (None, 6944) 0 \n",
"_________________________________________________________________\n",
"rl_dense (Dense) (None, 128) 888960 \n",
"_________________________________________________________________\n",
"dropout_8 (Dropout) (None, 128) 0 \n",
"dropout_6 (Dropout) (None, 128) 0 \n",
"_________________________________________________________________\n",
"rl_output (Dense) (None, 5) 645 \n",
"=================================================================\n",
@ -102,11 +99,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Connecting to AirSim...\n",
"Waiting for connection: \n",
"Connected!\n"
]
}
],
"source": [
"print('Connecting to AirSim...')\n",
"car_client = CarClient()\n",
@ -125,10 +130,8 @@
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def get_image(car_client):\n",
@ -149,9 +152,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"def append_to_ring_buffer(item, buffer, buffer_size):\n",
@ -214,7 +215,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
"version": "3.6.4"
}
},
"nbformat": 4,

Просмотреть файл

@ -13,6 +13,7 @@ import sys
import requests
import PIL
import copy
import datetime
# A class that represents the agent that will drive the vehicle, train the model, and send the gradient updates to the trainer.
class DistributedAgent():
@ -24,6 +25,8 @@ class DistributedAgent():
parameters['role_type'] = 'agent'
print('Starting time: {0}'.format(datetime.datetime.utcnow()), file=sys.stderr)
self.__model_buffer = None
self.__model = None
self.__airsim_started = False
@ -280,9 +283,12 @@ class DistributedAgent():
# This constraint is so the model doesn't end up having to churn through huge chunks of data, slowing down training
# 4) The car has run off the road
if (collision_info.has_collided or car_state.speed < 2 or utc_now > end_time or far_off):
print('Start time: {0}, end time: {1}'.format(start_time, utc_now), file=sys.stderr)
if (utc_now > end_time):
print('timed out.')
print('Full autonomous run finished at {0}'.format(utc_now), file=sys.stderr)
done = True
sys.stderr.flush()
else:
# The Agent should occasionally pick random action instead of best action
@ -338,6 +344,7 @@ class DistributedAgent():
print('Num total actions: {0}'.format(len(actions)))
# If we are in the main loop, reduce the epsilon parameter so that the model will be called more often
# Note: this will be overwritten by the trainer's epsilon if running in distributed mode
if not always_random:
self.__epsilon -= self.__per_iter_epsilon_reduction
self.__epsilon = max(self.__epsilon, self.__min_epsilon)
@ -399,12 +406,21 @@ class DistributedAgent():
post_data['gradients'] = gradients
post_data['batch_count'] = batches_count
new_model_parameters = requests.post('http://{0}:80/gradient_update'.format(self.__trainer_ip_address), json=post_data)
print('New params:')
print(new_model_parameters)
response = requests.post('http://{0}:80/gradient_update'.format(self.__trainer_ip_address), json=post_data)
print('Response:')
print(response)
new_model_parameters = response.json()
# Update the existing model with the new parameters
self.__model.from_packet(new_model_parameters.json())
self.__model.from_packet(new_model_parameters)
#If the trainer sends us a epsilon, allow it to override our local value
if ('epsilon' in new_model_parameters):
new_epsilon = float(new_model_parameters['epsilon'])
print('Overriding local epsilon with {0}, which was sent from trainer'.format(new_epsilon))
self.__epsilon = new_epsilon
else:
if (self.__num_batches_run > self.__batch_update_frequency + self.__last_checkpoint_batch_count):
self.__model.update_critic()

Просмотреть файл

@ -0,0 +1 @@
python distributed_agent.py batch_update_frequency=10 max_epoch_runtime_sec=30 per_iter_epsilon_reduction=0.003 min_epsilon=0.1 batch_size=32 replay_memory_size=50 weights_path=F:\ADCookbookUsabilityFixes\AutonomousDrivingCookbook\DistributedRL\Share\data\pretrain_model_weights.h5 train_conv_layers=false airsim_path=E:\AD_Cookbook_AirSim\ data_dir=F:\ADCookbookUsabilityFixes\AutonomousDrivingCookbook\DistributedRL\Share experiment_name=local_run local_run=true

Просмотреть файл

@ -27,6 +27,10 @@ next_batch_update_count = 0
checkpoint_dir = ''
agents_having_latest_critic = []
min_epsilon = float(os.environ['min_epsilon'])
epsilon_step = float(os.environ['per_iter_epsilon_reduction'])
epsilon = 1.0
# A simple endpoint that can be used to determine if the trainer is online.
# All requests will be responded to with a JSON {"message": "PONG"}
# Routed to /ping
@ -52,6 +56,9 @@ def gradient_update(request):
global batch_update_frequency
global checkpoint_dir
global agents_having_latest_critic
global epsilon
global epsilon_step
global min_epsilon
try:
# Check that the request is a POST
if (request.method != 'POST'):
@ -101,6 +108,13 @@ def gradient_update(request):
print('Agent {0} has received the latest critic model. Sending only the actor.'.format(request_ip))
model_response = rl_model.to_packet(get_target=False)
epsilon -= epsilon_step
epsilon = max(epsilon, min_epsilon)
print('Sending epsilon of {0} to {1}'.format(epsilon, request_ip))
model_response['epsilon'] = epsilon
# Send the response to the agent.
return JsonResponse(model_response)
finally:

16
InstallPackages.py Normal file
Просмотреть файл

@ -0,0 +1,16 @@
import os
# Run this script from within an anaconda virtual environment to install the required packages
# Be sure to run this script as root or as administrator.
os.system('python -m pip install --upgrade pip')
#os.system('conda update -n base conda')
os.system('conda install jupyter')
os.system('pip install matplotlib==2.1.2')
os.system('pip install image')
os.system('pip install keras_tqdm')
os.system('conda install -c conda-forge opencv')
os.system('pip install msgpack-rpc-python')
os.system('pip install pandas')
os.system('pip install numpy')
os.system('conda install scipy')