Adding DQN test for Linux cntk35 only for the moment

2017-04-12 15:18:21 +01:00 · 2017-04-12 15:18:21 +01:00 · 99202bd927
--- a/Scripts/install/linux/conda-linux-cntk-py35-environment.yml
+++ b/Scripts/install/linux/conda-linux-cntk-py35-environment.yml
@ -22,5 +22,5 @@ dependencies:
  - sphinx==1.5.4
  - sphinx-rtd-theme==0.2.4
  - twine==1.8.1
-  - gym==0.5.2
+  - gym['atari']==0.8.1
  - pydot-ng==1.0.0
--- a/Tests/EndToEndTests/CNTKv2Python/Examples/DeepQNeuralNetwork_test..py
+++ b/Tests/EndToEndTests/CNTKv2Python/Examples/DeepQNeuralNetwork_test..py
@ -0,0 +1,52 @@
+# ==============================================================================
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE.md file in the project root
+# for full license information.
+# ==============================================================================
+
+import numpy as np
+import os
+import sys
+import platform
+import pytest
+import gym
+
+abs_path = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(abs_path)
+sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "ReinforcementLearning"))
+
+if platform.system() != 'Linux':
+    pytest.skip('test only run on Linux (Gym Atari dependency)')
+
+# 1. Make environment:
+ENV_NAME = 'Pong-v3'
+env = gym.make(ENV_NAME)
+
+# 2. Make agent
+agent = DeepQAgent((4, 84, 84), env.action_space.n, train_after=100, monitor=False)
+
+# Train
+current_step = 0
+max_steps = 1000
+current_state = as_ale_input(env.reset())
+
+while current_step < max_steps:
+    action = agent.act(current_state)
+    new_state, reward, done, _ = env.step(action)
+    new_state = as_ale_input(new_state)
+
+    # Clipping reward for training stability
+    reward = np.clip(reward, -1, 1)
+
+    agent.observe(current_state, action, reward, done)
+    agent.train()
+
+    current_state = new_state
+
+    if done:
+        current_state = as_ale_input(env.reset())
+
+    current_step += 1
+
+
+assert len(agent._memory) == 1000