diff --git a/android/pylib/base_test_sharder.py b/android/pylib/base_test_sharder.py
index 530676915..b8d03c830 100644
--- a/android/pylib/base_test_sharder.py
+++ b/android/pylib/base_test_sharder.py
@@ -7,6 +7,7 @@ import android_commands
 import logging
 import multiprocessing
 
+from android_commands import errors
 from test_result import TestResults
 
 
@@ -43,7 +44,9 @@ class BaseTestSharder(object):
 
   def __init__(self, attached_devices):
     self.attached_devices = attached_devices
-    self.retries = 1
+    # Worst case scenario: a device will drop offline per run, so we need
+    # to retry until we're out of devices.
+    self.retries = len(self.attached_devices)
     self.tests = []
 
   def CreateShardedTestRunner(self, device, index):
@@ -83,12 +86,20 @@ class BaseTestSharder(object):
       logging.warning('Try %d of %d', retry + 1, self.retries)
       self.SetupSharding(self.tests)
       test_runners = []
-      for index, device in enumerate(self.attached_devices):
-        logging.warning('*' * 80)
-        logging.warning('Creating shard %d for %s', index, device)
-        logging.warning('*' * 80)
-        test_runner = self.CreateShardedTestRunner(device, index)
-        test_runners += [test_runner]
+
+      # Try to create N shards, and retrying on failure.
+      try:
+        for index, device in enumerate(self.attached_devices):
+          logging.warning('*' * 80)
+          logging.warning('Creating shard %d for %s', index, device)
+          logging.warning('*' * 80)
+          test_runner = self.CreateShardedTestRunner(device, index)
+          test_runners += [test_runner]
+      except errors.DeviceUnresponsiveError as e:
+        logging.critical('****Failed to create a shard: [%s]', e)
+        self.attached_devices.remove(device)
+        continue
+
       logging.warning('Starting...')
       pool = multiprocessing.Pool(len(self.attached_devices),
                                   SetTestsContainer,
@@ -96,8 +107,12 @@ class BaseTestSharder(object):
       # map can't handle KeyboardInterrupt exception. It's a python bug.
       # So use map_async instead.
       async_results = pool.map_async(_ShardedTestRunnable, test_runners)
-      results_lists = async_results.get(999999)
-
+      try:
+        results_lists = async_results.get(999999)
+      except errors.DeviceUnresponsiveError as e:
+        logging.critical('****Failed to run test: [%s]', e)
+        self.attached_devices = android_commands.GetAttachedDevices()
+        continue
       test_results = TestResults.FromTestResults(results_lists)
       # Re-check the attached devices for some devices may
       # become offline
@@ -119,5 +134,9 @@ class BaseTestSharder(object):
           self.tests += [t.name]
         if not self.tests:
           break
+    else:
+      # We ran out retries, possibly out of healthy devices.
+      # There's no recovery at this point.
+      raise Exception('Unrecoverable error while retrying test runs.')
     self.OnTestsCompleted(test_runners, final_results)
     return final_results
diff --git a/android/run_tests.py b/android/run_tests.py
index 1d58daff9..6b0ca7eda 100755
--- a/android/run_tests.py
+++ b/android/run_tests.py
@@ -209,25 +209,47 @@ class TestSharder(BaseTestSharder):
     self.log_dump_name = log_dump_name
     self.fast_and_loose = fast_and_loose
     self.build_type = build_type
-    test = SingleTestRunner(self.attached_devices[0], test_suite, gtest_filter,
-                            test_arguments, timeout, rebaseline,
-                            performance_test, cleanup_test_files, tool, 0,
-                            not not self.log_dump_name, fast_and_loose,
-                            build_type)
     self.tests = []
     if not self.gtest_filter:
       # No filter has been specified, let's add all tests then.
-      # The executable/apk needs to be copied before we can call GetAllTests.
-      test.test_package.StripAndCopyExecutable()
-      all_tests = test.test_package.GetAllTests()
-      if not rebaseline:
-        disabled_list = test.GetDisabledTests()
-        # Only includes tests that do not have any match in the disabled list.
-        all_tests = filter(lambda t:
-                           not any([fnmatch.fnmatch(t, disabled_pattern)
-                                    for disabled_pattern in disabled_list]),
-                           all_tests)
-      self.tests = all_tests
+      self.tests, self.attached_devices = self._GetTests()
+
+  def _GetTests(self):
+    """Returns a tuple of (all_tests, available_devices).
+
+    Tries to obtain the list of available tests.
+    Raises Exception if all devices failed.
+    """
+    available_devices = list(self.attached_devices)
+    while available_devices:
+      try:
+        logging.info('Obtaining tests from %s', available_devices[-1])
+        all_tests = self._GetTestsFromDevice(available_devices[-1])
+        return all_tests, available_devices
+      except Exception as e:
+        logging.info('Failed obtaining tests from %s %s',
+                     available_devices[-1], e)
+        available_devices.pop()
+    raise Exception('No device available to get the list of tests.')
+
+  def _GetTestsFromDevice(self, device):
+    test = SingleTestRunner(device, self.test_suite, self.gtest_filter,
+                            self.test_arguments, self.timeout, self.rebaseline,
+                            self.performance_test, self.cleanup_test_files,
+                            self.tool, 0,
+                            not not self.log_dump_name, self.fast_and_loose,
+                            self.build_type)
+    # The executable/apk needs to be copied before we can call GetAllTests.
+    test.test_package.StripAndCopyExecutable()
+    all_tests = test.test_package.GetAllTests()
+    if not self.rebaseline:
+      disabled_list = test.GetDisabledTests()
+      # Only includes tests that do not have any match in the disabled list.
+      all_tests = filter(lambda t:
+                         not any([fnmatch.fnmatch(t, disabled_pattern)
+                                  for disabled_pattern in disabled_list]),
+                         all_tests)
+    return all_tests
 
   def CreateShardedTestRunner(self, device, index):
     """Creates a suite-specific test runner.