Usability fixes. Move epsilon reduction to trainer in distributed RL training.

2018-03-18 17:59:19 -07:00 · 2018-03-18 17:59:19 -07:00 · 4d0ff77b24
--- a/AirSimE2EDeepLearning/DataExplorationAndPreparation.ipynb
+++ b/AirSimE2EDeepLearning/DataExplorationAndPreparation.ipynb
--- a/AirSimE2EDeepLearning/InstallPackages.py
+++ b/AirSimE2EDeepLearning/InstallPackages.py
@ -0,0 +1,16 @@
+import os
+
+# Run this script from within an anaconda virtual environment to install the required packages
+# Be sure to run this script as root or as administrator.
+
+os.system('python -m pip install --upgrade pip')
+#os.system('conda update -n base conda')
+os.system('conda install jupyter')
+os.system('pip install matplotlib==2.1.2')
+os.system('pip install image')
+os.system('pip install keras_tqdm')
+os.system('conda install -c conda-forge opencv')
+os.system('pip install msgpack-rpc-python')
+os.system('pip install pandas')
+os.system('pip install numpy')
+os.system('conda install scipy')
--- a/AirSimE2EDeepLearning/README.md
+++ b/AirSimE2EDeepLearning/README.md
@ -45,6 +45,16 @@ You should also be comfortable with Python. At the very least, you should be abl
 3. [Install h5py](http://docs.h5py.org/en/latest/build.html)
 4. [Install Keras](https://keras.io/#installation) and [configure the Keras backend](https://keras.io/backend/) to work with TensorFlow (default) or CNTK.
 5. [Install AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy). Be sure to add the location for the AzCopy executable to your system path.
+6. Install the other dependencies. From your anaconda environment, run "InstallPackages.py" as root or administrator. This installs the following packages into your environment:
+    * jupyter
+    * matplotlib v. 2.1.2
+    * image
+    * keras_tqdm
+    * opencv
+    * msgpack-rpc-python
+    * pandas
+    * numpy
+    * scipy

 ### Simulator Package

--- a/AirSimE2EDeepLearning/TestModel.ipynb
+++ b/AirSimE2EDeepLearning/TestModel.ipynb
@ -13,22 +13,45 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using model model/models\\model_model.02-0.0216478.h5 for testing.\n"
+     ]
+    }
+   ],
   "source": [
    "from keras.models import load_model\n",
    "import sys\n",
    "import numpy as np\n",
+    "import glob\n",
+    "import os\n",
    "\n",
    "if ('../../PythonClient/' not in sys.path):\n",
    "    sys.path.insert(0, '../../PythonClient/')\n",
    "from AirSimClient import *\n",
    "\n",
    "# << Set this to the path of the model >>\n",
-    "MODEL_PATH = 'model/models/sample_model.h5'"
+    "# If None, then the model with the lowest validation loss from training will be used\n",
+    "MODEL_PATH = None\n",
+    "\n",
+    "if (MODEL_PATH == None):\n",
+    "    models = glob.glob('model/models/*.h5') \n",
+    "    best_model = max(models, key=os.path.getctime)\n",
+    "    MODEL_PATH = best_model\n",
+    "    \n",
+    "print('Using model {0} for testing.'.format(best_model))"
   ]
  },
  {
@ -40,16 +63,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 3,
+   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Waiting for connection: \n"
+      "Waiting for connection: \n",
+      "Connection established!\n"
     ]
    }
   ],
@ -59,7 +81,8 @@
    "client = CarClient()\n",
    "client.confirmConnection()\n",
    "client.enableApiControl(True)\n",
-    "car_controls = CarControls()"
+    "car_controls = CarControls()\n",
+    "print('Connection established!')"
   ]
  },
  {
@ -71,10 +94,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 4,
+   "metadata": {},
   "outputs": [],
   "source": [
    "car_controls.steering = 0\n",
@ -94,10 +115,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 5,
+   "metadata": {},
   "outputs": [],
   "source": [
    "def get_image():\n",
@ -118,9 +137,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "while (True):\n",
@ -189,7 +206,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.0"
+   "version": "3.6.4"
  }
 },
 "nbformat": 4,
--- a/AirSimE2EDeepLearning/TrainModel.ipynb
+++ b/AirSimE2EDeepLearning/TrainModel.ipynb
--- a/DistributedRL/LaunchLocalTrainingJob.ipynb
+++ b/DistributedRL/LaunchLocalTrainingJob.ipynb
@ -11,10 +11,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 1,
+   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
@ -34,28 +32,30 @@
    "* **replay_memory_size**: The number of examples to keep in the replay memory. The replay memory is a FIFO buffer used to reduce the effects of nearby states being correlated. Minibatches are generated from randomly selecting examples from the replay memory.\n",
    "* **weights_path**: If we are doing transfer learning and using pretrained weights for the model, they will be loaded from this path.\n",
    "* **train_conv_layers**: If we are using pretrained weights, we may prefer to freeze the convolutional layers to speed up training.\n",
-    "* **airsim_path**: The path to the .ps1 to start AirSim. This path cannot contain spaces.\n",
-    "* **data_dir**: The path to the directory containing the road_points.txt and reward_points.txt used to compute the reward function. This path cannot contain spaces."
+    "* **airsim_path**: The path to the folder containing the .ps1 to start AirSim. This path cannot contain spaces.\n",
+    "* **data_dir**: The path to the directory containing the road_points.txt and reward_points.txt used to compute the reward function. This path cannot contain spaces.\n",
+    "* **experiment_name**: A unique identifier for this experiment"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 8,
+   "metadata": {},
   "outputs": [],
   "source": [
-    "batch_update_frequency = 300\n",
+    "#batch_update_frequency = 300\n",
+    "batch_update_frequency = 10\n",
    "max_epoch_runtime_sec = 30\n",
    "per_iter_epsilon_reduction=0.003\n",
    "min_epsilon = 0.1\n",
    "batch_size = 32\n",
-    "replay_memory_size = 2000\n",
-    "weights_path = 'D:\\\\data\\\\pretrain_model_weights.h5'\n",
+    "#replay_memory_size = 2000\n",
+    "replay_memory_size = 50\n",
+    "weights_path = os.path.join(os.getcwd(), 'Share\\\\data\\\\pretrain_model_weights.h5')\n",
    "train_conv_layers = 'false'\n",
-    "airsim_path = 'D:\\\\AD_Cookbook_AirSim\\\\'\n",
-    "data_dir = os.path.join(os.getcwd(), 'Share')"
+    "airsim_path = 'E:\\\\AD_Cookbook_AirSim\\\\'\n",
+    "data_dir = os.path.join(os.getcwd(), 'Share')\n",
+    "experiment_name = 'local_run'"
   ]
  },
  {
@ -67,10 +67,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 9,
+   "metadata": {},
   "outputs": [],
   "source": [
    "train_cmd = 'python distributed_agent.py'\n",
@ -84,6 +82,8 @@
    "train_cmd += ' train_conv_layers={0}'.format(train_conv_layers)\n",
    "train_cmd += ' airsim_path={0}'.format(airsim_path)\n",
    "train_cmd += ' data_dir={0}'.format(data_dir)\n",
+    "train_cmd += ' experiment_name={0}'.format(experiment_name)\n",
+    "train_cmd += ' local_run=true'\n",
    "\n",
    "with open(os.path.join(os.getcwd(), 'Share/scripts_downpour/app/train.bat'), 'w') as f:\n",
    "    f.write(train_cmd)"
@ -113,7 +113,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.0"
+   "version": "3.6.4"
  }
 },
 "nbformat": 4,
--- a/DistributedRL/LaunchTrainingJob.ipynb
+++ b/DistributedRL/LaunchTrainingJob.ipynb
@ -126,9 +126,7 @@
  {
   "cell_type": "code",
   "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
@ -144,7 +142,7 @@
    "# Trainer task\n",
    "tasks.append(batchmodels.TaskAddParameter(\n",
    "        id='TrainerTask',\n",
-    "        command_line=r'call C:\\\\prereq\\\\mount.bat && C:\\\\ProgramData\\\\Anaconda3\\\\Scripts\\\\activate.bat py36 && python -u Z:\\\\scripts_downpour\\\\manage.py runserver 0.0.0.0:80 data_dir=Z:\\\\\\\\ role=trainer experiment_name={0} batch_update_frequency={1} weights_path={2} train_conv_layers={3}'.format(job_id, batch_update_frequency, weights_path, train_conv_layers),\n",
+    "        command_line=r'call C:\\\\prereq\\\\mount.bat && C:\\\\ProgramData\\\\Anaconda3\\\\Scripts\\\\activate.bat py36 && python -u Z:\\\\scripts_downpour\\\\manage.py runserver 0.0.0.0:80 data_dir=Z:\\\\\\\\ role=trainer experiment_name={0} batch_update_frequency={1} weights_path={2} train_conv_layers={3} per_iter_epsilon_reduction={4} min_epsilon={5}'.format(job_id, batch_update_frequency, weights_path, train_conv_layers, per_iter_epsilon_reduction, min_epsilon),\n",
    "        display_name='Trainer',\n",
    "        user_identity=batchmodels.UserIdentity(user_name=NOTEBOOK_CONFIG['batch_job_user_name']),\n",
    "        multi_instance_settings = batchmodels.MultiInstanceSettings(number_of_instances=1, coordination_command_line='cls')\n",
@ -198,7 +196,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.0"
+   "version": "3.6.4"
  }
 },
 "nbformat": 4,
--- a/DistributedRL/README.md
+++ b/DistributedRL/README.md
@ -34,13 +34,23 @@ It is also highly recommended that the reader has familiarity with the AirSim si

 #### Environment Setup

-2. [Install Anaconda](https://conda.io/docs/user-guide/install/index.html) with Python 3.5 or higher.
-3. [Install Tensorflow](https://www.tensorflow.org/install/install_windows)
-4. [Install h5py](http://docs.h5py.org/en/latest/build.html)
+1. [Install Anaconda](https://conda.io/docs/user-guide/install/index.html) with Python 3.5 or higher.
+2. [Install Tensorflow](https://www.tensorflow.org/install/install_windows)
+3. [Install h5py](http://docs.h5py.org/en/latest/build.html)
 4. [Install Keras](https://keras.io/#installation)
-7. [Install AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy). Be sure to add the location for the AzCopy executable to your system path.
-8. [Install the latest verison of Azure Powershell](https://docs.microsoft.com/en-us/powershell/azure/install-azurerm-ps?view=azurermps-5.3.0).
-9. [Install the latest version of the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest).
+5. [Install AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy). Be sure to add the location for the AzCopy executable to your system path.
+6. [Install the latest verison of Azure Powershell](https://docs.microsoft.com/en-us/powershell/azure/install-azurerm-ps?view=azurermps-5.3.0).
+7. [Install the latest version of the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest).
+8. Install the other dependencies. From your anaconda environment, run "InstallPackages.py" as root or administrator. This installs the following packages into your environment:
+    * jupyter
+    * matplotlib v. 2.1.2
+    * image
+    * keras_tqdm
+    * opencv
+    * msgpack-rpc-python
+    * pandas
+    * numpy
+    * scipy

 #### Simulator Package

--- a/DistributedRL/RunModel.ipynb
+++ b/DistributedRL/RunModel.ipynb
@ -13,10 +13,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 7,
+   "metadata": {},
   "outputs": [],
   "source": [
    "from Share.scripts_downpour.app.airsim_client import *\n",
@ -41,10 +39,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 8,
   "metadata": {
-    "collapsed": false,
-    "scrolled": true
+    "scrolled": false
   },
   "outputs": [
    {
@ -54,27 +51,27 @@
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
-      "input_4 (InputLayer)         (None, 59, 255, 3)        0         \n",
+      "input_3 (InputLayer)         (None, 59, 255, 3)        0         \n",
      "_________________________________________________________________\n",
      "convolution0 (Conv2D)        (None, 59, 255, 16)       448       \n",
      "_________________________________________________________________\n",
-      "max_pooling2d_10 (MaxPooling (None, 29, 127, 16)       0         \n",
+      "max_pooling2d_7 (MaxPooling2 (None, 29, 127, 16)       0         \n",
      "_________________________________________________________________\n",
      "convolution1 (Conv2D)        (None, 29, 127, 32)       4640      \n",
      "_________________________________________________________________\n",
-      "max_pooling2d_11 (MaxPooling (None, 14, 63, 32)        0         \n",
+      "max_pooling2d_8 (MaxPooling2 (None, 14, 63, 32)        0         \n",
      "_________________________________________________________________\n",
      "convolution2 (Conv2D)        (None, 14, 63, 32)        9248      \n",
      "_________________________________________________________________\n",
-      "max_pooling2d_12 (MaxPooling (None, 7, 31, 32)         0         \n",
+      "max_pooling2d_9 (MaxPooling2 (None, 7, 31, 32)         0         \n",
      "_________________________________________________________________\n",
-      "flatten_4 (Flatten)          (None, 6944)              0         \n",
+      "flatten_3 (Flatten)          (None, 6944)              0         \n",
      "_________________________________________________________________\n",
-      "dropout_7 (Dropout)          (None, 6944)              0         \n",
+      "dropout_5 (Dropout)          (None, 6944)              0         \n",
      "_________________________________________________________________\n",
      "rl_dense (Dense)             (None, 128)               888960    \n",
      "_________________________________________________________________\n",
-      "dropout_8 (Dropout)          (None, 128)               0         \n",
+      "dropout_6 (Dropout)          (None, 128)               0         \n",
      "_________________________________________________________________\n",
      "rl_output (Dense)            (None, 5)                 645       \n",
      "=================================================================\n",
@ -102,11 +99,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Connecting to AirSim...\n",
+      "Waiting for connection: \n",
+      "Connected!\n"
+     ]
+    }
+   ],
   "source": [
    "print('Connecting to AirSim...')\n",
    "car_client = CarClient()\n",
@ -125,10 +130,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 10,
+   "metadata": {},
   "outputs": [],
   "source": [
    "def get_image(car_client):\n",
@ -149,9 +152,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "def append_to_ring_buffer(item, buffer, buffer_size):\n",
@ -214,7 +215,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.0"
+   "version": "3.6.4"
  }
 },
 "nbformat": 4,
--- a/DistributedRL/Share/scripts_downpour/app/distributed_agent.py
+++ b/DistributedRL/Share/scripts_downpour/app/distributed_agent.py
@ -13,6 +13,7 @@ import sys
 import requests
 import PIL
 import copy
+import datetime

 # A class that represents the agent that will drive the vehicle, train the model, and send the gradient updates to the trainer.
 class DistributedAgent():
@ -24,6 +25,8 @@ class DistributedAgent():

        parameters['role_type'] = 'agent'

+        
+        print('Starting time: {0}'.format(datetime.datetime.utcnow()), file=sys.stderr)
        self.__model_buffer = None
        self.__model = None
        self.__airsim_started = False
@ -280,9 +283,12 @@ class DistributedAgent():
            #       This constraint is so the model doesn't end up having to churn through huge chunks of data, slowing down training
            # 4) The car has run off the road
            if (collision_info.has_collided or car_state.speed < 2 or utc_now > end_time or far_off):
+                print('Start time: {0}, end time: {1}'.format(start_time, utc_now), file=sys.stderr)
                if (utc_now > end_time):
                    print('timed out.')
+                    print('Full autonomous run finished at {0}'.format(utc_now), file=sys.stderr)
                done = True
+                sys.stderr.flush()
            else:

                # The Agent should occasionally pick random action instead of best action
@ -338,6 +344,7 @@ class DistributedAgent():
        print('Num total actions: {0}'.format(len(actions)))
        
        # If we are in the main loop, reduce the epsilon parameter so that the model will be called more often
+        # Note: this will be overwritten by the trainer's epsilon if running in distributed mode
        if not always_random:
            self.__epsilon -= self.__per_iter_epsilon_reduction
            self.__epsilon = max(self.__epsilon, self.__min_epsilon)
@ -399,12 +406,21 @@ class DistributedAgent():
            post_data['gradients'] = gradients
            post_data['batch_count'] = batches_count
            
-            new_model_parameters = requests.post('http://{0}:80/gradient_update'.format(self.__trainer_ip_address), json=post_data)
-            print('New params:')
-            print(new_model_parameters)
+            response = requests.post('http://{0}:80/gradient_update'.format(self.__trainer_ip_address), json=post_data)
+            print('Response:')
+            print(response)
+
+            new_model_parameters = response.json()
            
            # Update the existing model with the new parameters
-            self.__model.from_packet(new_model_parameters.json())
+            self.__model.from_packet(new_model_parameters)
+            
+            #If the trainer sends us a epsilon, allow it to override our local value
+            if ('epsilon' in new_model_parameters):
+                new_epsilon = float(new_model_parameters['epsilon'])
+                print('Overriding local epsilon with {0}, which was sent from trainer'.format(new_epsilon))
+                self.__epsilon = new_epsilon
+                
        else:
            if (self.__num_batches_run > self.__batch_update_frequency + self.__last_checkpoint_batch_count):
                self.__model.update_critic()
--- a/DistributedRL/Share/scripts_downpour/app/train.bat
+++ b/DistributedRL/Share/scripts_downpour/app/train.bat
@ -0,0 +1 @@
+python distributed_agent.py batch_update_frequency=10 max_epoch_runtime_sec=30 per_iter_epsilon_reduction=0.003 min_epsilon=0.1 batch_size=32 replay_memory_size=50 weights_path=F:\ADCookbookUsabilityFixes\AutonomousDrivingCookbook\DistributedRL\Share\data\pretrain_model_weights.h5 train_conv_layers=false airsim_path=E:\AD_Cookbook_AirSim\ data_dir=F:\ADCookbookUsabilityFixes\AutonomousDrivingCookbook\DistributedRL\Share experiment_name=local_run local_run=true
--- a/DistributedRL/Share/scripts_downpour/app/views.py
+++ b/DistributedRL/Share/scripts_downpour/app/views.py
@ -27,6 +27,10 @@ next_batch_update_count = 0
 checkpoint_dir = ''
 agents_having_latest_critic = []

+min_epsilon = float(os.environ['min_epsilon'])
+epsilon_step = float(os.environ['per_iter_epsilon_reduction'])
+epsilon = 1.0
+
 # A simple endpoint that can be used to determine if the trainer is online.
 # All requests will be responded to with a JSON {"message": "PONG"}
 # Routed to /ping
@ -52,6 +56,9 @@ def gradient_update(request):
    global batch_update_frequency
    global checkpoint_dir
    global agents_having_latest_critic
+    global epsilon
+    global epsilon_step
+    global min_epsilon
    try:
        # Check that the request is a POST
        if (request.method != 'POST'):
@ -101,6 +108,13 @@ def gradient_update(request):
                print('Agent {0} has received the latest critic model. Sending only the actor.'.format(request_ip))
                model_response = rl_model.to_packet(get_target=False)

+            epsilon -= epsilon_step
+            epsilon = max(epsilon, min_epsilon)
+            
+            print('Sending epsilon of {0} to {1}'.format(epsilon, request_ip))
+            
+            model_response['epsilon'] = epsilon
+                
            # Send the response to the agent.
            return JsonResponse(model_response)
    finally:
--- a/InstallPackages.py
+++ b/InstallPackages.py
@ -0,0 +1,16 @@
+import os
+
+# Run this script from within an anaconda virtual environment to install the required packages
+# Be sure to run this script as root or as administrator.
+
+os.system('python -m pip install --upgrade pip')
+#os.system('conda update -n base conda')
+os.system('conda install jupyter')
+os.system('pip install matplotlib==2.1.2')
+os.system('pip install image')
+os.system('pip install keras_tqdm')
+os.system('conda install -c conda-forge opencv')
+os.system('pip install msgpack-rpc-python')
+os.system('pip install pandas')
+os.system('pip install numpy')
+os.system('conda install scipy')
				`@ -0,0 +1 @@`
				`python distributed_agent.py batch_update_frequency=10 max_epoch_runtime_sec=30 per_iter_epsilon_reduction=0.003 min_epsilon=0.1 batch_size=32 replay_memory_size=50 weights_path=F:\ADCookbookUsabilityFixes\AutonomousDrivingCookbook\DistributedRL\Share\data\pretrain_model_weights.h5 train_conv_layers=false airsim_path=E:\AD_Cookbook_AirSim\ data_dir=F:\ADCookbookUsabilityFixes\AutonomousDrivingCookbook\DistributedRL\Share experiment_name=local_run local_run=true`