Bug 1299216 - Enhance handling of crashes in Marionette. r=automatedtester

There are currently various issues in Marionette which cause extra and unnecessary logging output
in case of chrome and content crashes. Especially the latter ones are not easy to discover. This
patch makes the following improvements:

* Correctly mark process crashes as IOError test failure. Until now we have only used the "Process
  has been closed" message, which does not correctly cover it, especially not if the process shutdown without
  a crash.

* Allow changing the socket_timeout for the socket client. This was mainly necessary to allow unit tests
  to run faster.

* Collect the number of crashes so it's known even later if the process has been crashed during the
  current session. It also fixes the case when check_for_crash() gets called twice, and for the
  second time False is returned which will trigger an invalid code path.

* Reduce code duplication when destroying a session.

* Adding a unit test to verify the correct behavior for chrome and content crashes.

MozReview-Commit-ID: KdUQuJqFRli

--HG--
extra : rebase_source : a2a0b0269ff84679acc6698c72ec8dc45b1dfe52
This commit is contained in:
Henrik Skupin 2016-11-02 13:49:32 +01:00
Родитель 0baa0a9c53
Коммит 590cdec86a
12 изменённых файлов: 273 добавлений и 62 удалений

Просмотреть файл

@ -33,31 +33,34 @@ def do_process_check(func, always=False):
def check_for_crash(): def check_for_crash():
try: try:
m.check_for_crash() return m.check_for_crash()
except: except Exception:
# don't want to lose the original exception # don't want to lose the original exception
traceback.print_exc() traceback.print_exc()
return False
try: try:
return func(*args, **kwargs) return func(*args, **kwargs)
except (MarionetteException, IOError) as e: except (MarionetteException, IOError) as e:
exc, val, tb = sys.exc_info() exc, val, tb = sys.exc_info()
crashed = False
# In case of no Marionette failures ensure to check for possible crashes. # In case of no Marionette failures ensure to check for possible crashes.
# Do it before checking for port disconnects, to avoid reporting of unrelated # Do it before checking for port disconnects, to avoid reporting of unrelated
# crashes due to a forced shutdown of the application. # crashes due to a forced shutdown of the application.
if not isinstance(e, MarionetteException) or type(e) is MarionetteException: if not isinstance(e, MarionetteException) or type(e) is MarionetteException:
if not always: if not always:
check_for_crash() crashed = check_for_crash()
# In case of socket failures force a shutdown of the application # In case of socket failures force a shutdown of the application
if type(e) in (socket.error, socket.timeout): if type(e) in (socket.error, socket.timeout) or crashed:
m.force_shutdown() m.handle_socket_failure(crashed)
raise exc, val, tb raise exc, val, tb
finally: finally:
if always: if always:
check_for_crash(m) check_for_crash()
return _ return _

Просмотреть файл

@ -540,6 +540,7 @@ class Marionette(object):
CONTEXT_CHROME = 'chrome' # non-browser content: windows, dialogs, etc. CONTEXT_CHROME = 'chrome' # non-browser content: windows, dialogs, etc.
CONTEXT_CONTENT = 'content' # browser content: iframes, divs, etc. CONTEXT_CONTENT = 'content' # browser content: iframes, divs, etc.
DEFAULT_CRASH_TIMEOUT = 10
DEFAULT_SOCKET_TIMEOUT = 60 DEFAULT_SOCKET_TIMEOUT = 60
DEFAULT_STARTUP_TIMEOUT = 120 DEFAULT_STARTUP_TIMEOUT = 120
DEFAULT_SHUTDOWN_TIMEOUT = 65 # Firefox will kill hanging threads after 60s DEFAULT_SHUTDOWN_TIMEOUT = 65 # Firefox will kill hanging threads after 60s
@ -572,6 +573,7 @@ class Marionette(object):
self._test_name = None self._test_name = None
self.timeout = timeout self.timeout = timeout
self.socket_timeout = socket_timeout self.socket_timeout = socket_timeout
self.crashed = 0
startup_timeout = startup_timeout or self.DEFAULT_STARTUP_TIMEOUT startup_timeout = startup_timeout or self.DEFAULT_STARTUP_TIMEOUT
if self.bin: if self.bin:
@ -621,7 +623,6 @@ class Marionette(object):
# hit an exception/died or the connection died. We can # hit an exception/died or the connection died. We can
# do no further server-side cleanup in this case. # do no further server-side cleanup in this case.
pass pass
self.session = None
if self.instance: if self.instance:
self.instance.close() self.instance.close()
@ -721,11 +722,8 @@ class Marionette(object):
else: else:
msg = self.client.request(name, params) msg = self.client.request(name, params)
except socket.timeout: except IOError:
self.session = None self.delete_session(send_request=False)
self.window = None
self.client.close()
raise raise
res, err = msg.result, msg.error res, err = msg.result, msg.error
@ -783,36 +781,54 @@ class Marionette(object):
self.set_page_load_timeout(30000) self.set_page_load_timeout(30000)
def check_for_crash(self): def check_for_crash(self):
returncode = None """Check if the process crashed.
name = None
crashed = False :returns: True, if a crash happened since the method has been called the last time.
"""
crash_count = 0
if self.instance: if self.instance:
if self.instance.runner.check_for_crashes( name = self.test_name or 'marionette.py'
test_name=self.test_name or os.path.basename(__file__)): crash_count = self.instance.runner.check_for_crashes(test_name=name)
crashed = True self.crashed = self.crashed + crash_count
if returncode is not None:
print ('PROCESS-CRASH | {0} | abnormal termination with exit code {1}'
.format(name, returncode))
return crashed
def force_shutdown(self): return crash_count > 0
"""Force a shutdown of the running instance.
If we've launched the binary we are connected to, wait for it to shut down. def handle_socket_failure(self, crashed=False):
In the case when it doesn't happen, force its shut down. """Handle socket failures for the currently running instance.
:param crashed: Optional flag which indicates that the process has been crashed,
and no further socket checks have to be performed. Defaults to False.
If the application crashed then clean-up internal states, or in case of a content
crash also kill the process. If there are other reasons for a socket failure,
wait for the process to shutdown itself, or force kill it.
""" """
if self.instance: if self.instance:
exc, val, tb = sys.exc_info() exc, val, tb = sys.exc_info()
# Give the application some time to shutdown # If the content process crashed, Marionette forces the application to shutdown.
returncode = self.instance.runner.wait(timeout=self.DEFAULT_SHUTDOWN_TIMEOUT) if crashed:
if returncode is None: returncode = self.instance.runner.wait(timeout=self.DEFAULT_CRASH_TIMEOUT)
self.cleanup()
message = ('Process killed because the connection to Marionette server is lost.' if returncode == 0:
' Check gecko.log for errors') message = 'Content process crashed'
else:
message = 'Process crashed (Exit code: {returncode})'
self.delete_session(send_request=False, reset_session_id=True)
else: else:
message = 'Process has been closed (Exit code: {returncode})' # Somehow the socket disconnected. Give the application some time to shutdown
# itself before killing the process.
returncode = self.instance.runner.wait(timeout=self.DEFAULT_SHUTDOWN_TIMEOUT)
if returncode is None:
self.quit()
message = ('Process killed because the connection to Marionette server is '
'lost. Check gecko.log for errors')
else:
message = 'Process has been closed (Exit code: {returncode})'
self.delete_session(send_request=False, reset_session_id=True)
if exc: if exc:
message += ' (Reason: {reason})' message += ' (Reason: {reason})'
@ -1135,12 +1151,14 @@ class Marionette(object):
callback() callback()
else: else:
self._request_in_app_shutdown() self._request_in_app_shutdown()
self.delete_session(in_app=True)
# Ensure to explicitely mark the session as deleted
self.delete_session(send_request=False, reset_session_id=True)
# Give the application some time to shutdown # Give the application some time to shutdown
self.instance.runner.wait(timeout=self.DEFAULT_SHUTDOWN_TIMEOUT) self.instance.runner.wait(timeout=self.DEFAULT_SHUTDOWN_TIMEOUT)
else: else:
self.delete_session() self.delete_session(reset_session_id=True)
self.instance.close() self.instance.close()
@do_process_check @do_process_check
@ -1174,7 +1192,9 @@ class Marionette(object):
callback() callback()
else: else:
self._request_in_app_shutdown("eRestart") self._request_in_app_shutdown("eRestart")
self.delete_session(in_app=True)
# Ensure to explicitely mark the session as deleted
self.delete_session(send_request=False, reset_session_id=True)
try: try:
self.raise_for_port() self.raise_for_port()
@ -1221,7 +1241,11 @@ class Marionette(object):
:param session_id: unique identifier for the session. If no session id is :param session_id: unique identifier for the session. If no session id is
passed in then one will be generated by the marionette server. passed in then one will be generated by the marionette server.
:returns: A dict of the capabilities offered.""" :returns: A dict of the capabilities offered.
"""
self.crashed = 0
if self.instance: if self.instance:
returncode = self.instance.runner.returncode returncode = self.instance.runner.returncode
if returncode is not None: if returncode is not None:
@ -1255,19 +1279,25 @@ class Marionette(object):
self._send_message("setTestName", {"value": test_name}) self._send_message("setTestName", {"value": test_name})
self._test_name = test_name self._test_name = test_name
def delete_session(self, in_app=False): def delete_session(self, send_request=True, reset_session_id=False):
"""Close the current session and disconnect from the server. """Close the current session and disconnect from the server.
:param in_app: False, if the session should be closed from the client. :param send_request: Optional, if `True` a request to close the session on
Otherwise a request to quit or restart the instance from the server side will be send. Use `False` in case of eg. in_app restart()
within the application itself is used. or quit(), which trigger a deletion themselves. Defaults to `True`.
:param reset_session_id: Optional, if `True` the current session id will
be reset, which will require an explicit call to `start_session()` before
the test can continue. Defaults to `False`.
""" """
if not in_app: try:
self._send_message("deleteSession") if send_request:
self.session_id = None self._send_message("deleteSession")
self.session = None finally:
self.window = None if reset_session_id:
self.client.close() self.session_id = None
self.session = None
self.window = None
self.client.close()
@property @property
def session_capabilities(self): def session_capabilities(self):

Просмотреть файл

@ -125,7 +125,7 @@ class TcpTransport(object):
""" """
self.addr = addr self.addr = addr
self.port = port self.port = port
self.socket_timeout = socket_timeout self._socket_timeout = socket_timeout
self.protocol = 1 self.protocol = 1
self.application_type = None self.application_type = None
@ -133,6 +133,16 @@ class TcpTransport(object):
self.expected_response = None self.expected_response = None
self.sock = None self.sock = None
@property
def socket_timeout(self):
return self._socket_timeout
@socket_timeout.setter
def socket_timeout(self, value):
if self.sock:
self.sock.settimeout(value)
self._socket_timeout = value
def _unmarshal(self, packet): def _unmarshal(self, packet):
msg = None msg = None

Просмотреть файл

@ -1 +1 @@
mozrunner >= 6.12 mozrunner >= 6.13

Просмотреть файл

@ -9,6 +9,7 @@ from .marionette_test import (
expectedFailure, expectedFailure,
MarionetteJSTestCase, MarionetteJSTestCase,
MarionetteTestCase, MarionetteTestCase,
run_if_e10s,
skip, skip,
skip_if_chrome, skip_if_chrome,
skip_if_desktop, skip_if_desktop,

Просмотреть файл

@ -8,6 +8,7 @@ __version__ = '3.1.0'
from .decorators import ( from .decorators import (
expectedFailure, expectedFailure,
parameterized, parameterized,
run_if_e10s,
skip, skip,
skip_if_chrome, skip_if_chrome,
skip_if_desktop, skip_if_desktop,

Просмотреть файл

@ -61,6 +61,23 @@ def parameterized(func_suffix, *args, **kwargs):
return wrapped return wrapped
def run_if_e10s(target):
"""Decorator which runs a test if e10s mode is active."""
def wrapper(self, *args, **kwargs):
with self.marionette.using_context('chrome'):
multi_process_browser = self.marionette.execute_script("""
try {
return Services.appinfo.browserTabsRemoteAutostart;
} catch (e) {
return false;
}""")
if not multi_process_browser:
raise SkipTest('skipping due to e10s is disabled')
return target(self, *args, **kwargs)
return wrapper
def skip(reason): def skip(reason):
"""Decorator which unconditionally skips a test.""" """Decorator which unconditionally skips a test."""
def decorator(test_item): def decorator(test_item):

Просмотреть файл

@ -6,7 +6,6 @@ import imp
import os import os
import re import re
import sys import sys
import socket
import time import time
import types import types
import unittest import unittest
@ -274,13 +273,9 @@ class CommonTestCase(unittest.TestCase):
self.loglines = [['Error getting log: {}'.format(inst)]] self.loglines = [['Error getting log: {}'.format(inst)]]
try: try:
self.marionette.delete_session() self.marionette.delete_session()
except (socket.error, MarionetteException, IOError): except IOError:
# Gecko has crashed? # Gecko has crashed?
self.marionette.session = None pass
try:
self.marionette.client.close()
except socket.error:
pass
self.marionette = None self.marionette = None
def setup_SpecialPowers_observer(self): def setup_SpecialPowers_observer(self):
@ -484,7 +479,7 @@ class MarionetteTestCase(CommonTestCase):
if not self.marionette.session: if not self.marionette.session:
self.marionette.start_session() self.marionette.start_session()
if not self.marionette.check_for_crash(): if not self.marionette.crashed:
try: try:
self.marionette.clear_imported_scripts() self.marionette.clear_imported_scripts()
self.marionette.execute_script("log('TEST-END: {0}:{1}')" self.marionette.execute_script("log('TEST-END: {0}:{1}')"

Просмотреть файл

@ -23,7 +23,6 @@ import mozprofile
from manifestparser import TestManifest from manifestparser import TestManifest
from manifestparser.filters import tags from manifestparser.filters import tags
from marionette_driver.marionette import Marionette from marionette_driver.marionette import Marionette
from mozlog import get_default_logger
from moztest.adapters.unit import StructuredTestRunner, StructuredTestResult from moztest.adapters.unit import StructuredTestRunner, StructuredTestResult
from moztest.results import TestResultCollection, TestResult, relevant_line from moztest.results import TestResultCollection, TestResult, relevant_line
import mozversion import mozversion
@ -568,9 +567,8 @@ class BaseMarionetteTestRunner(object):
rv['screenshot'] = marionette.screenshot() rv['screenshot'] = marionette.screenshot()
with marionette.using_context(marionette.CONTEXT_CONTENT): with marionette.using_context(marionette.CONTEXT_CONTENT):
rv['source'] = marionette.page_source rv['source'] = marionette.page_source
except Exception: except Exception as exc:
logger = get_default_logger() self.logger.warning('Failed to gather test failure debug: {}'.format(exc))
logger.warning('Failed to gather test failure debug.', exc_info=True)
return rv return rv
self.result_callbacks.append(gather_debug) self.result_callbacks.append(gather_debug)

Просмотреть файл

@ -0,0 +1,155 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import glob
import shutil
from marionette import MarionetteTestCase, expectedFailure, run_if_e10s
from marionette_driver.errors import MarionetteException
# Import runner module to monkey patch mozcrash module
from mozrunner.base import runner
class MockMozCrash(object):
"""Mock object to replace original mozcrash methods."""
def __init__(self, marionette):
self.marionette = marionette
with self.marionette.using_context('chrome'):
self.crash_reporter_enabled = self.marionette.execute_script("""
try {
Components.classes["@mozilla.org/toolkit/crash-reporter;1"].
getService(Components.interfaces.nsICrashReporter);
return true;
} catch (exc) {
return false;
}
""")
def check_for_crashes(self, dump_directory, *args, **kwargs):
minidump_files = glob.glob('{}/*.dmp'.format(dump_directory))
shutil.rmtree(dump_directory, ignore_errors=True)
if self.crash_reporter_enabled:
return len(minidump_files)
else:
return len(minidump_files) == 0
def log_crashes(self, logger, dump_directory, *args, **kwargs):
return self.check_for_crashes(dump_directory, *args, **kwargs)
class BaseCrashTestCase(MarionetteTestCase):
# Reduce the timeout for faster processing of the tests
socket_timeout = 10
def setUp(self):
super(BaseCrashTestCase, self).setUp()
self.mozcrash_mock = MockMozCrash(self.marionette)
self.crash_count = self.marionette.crashed
self.pid = self.marionette.session["processId"]
self.remote_uri = self.marionette.absolute_url("javascriptPage.html")
def tearDown(self):
self.marionette.crashed = self.crash_count
super(BaseCrashTestCase, self).tearDown()
def crash(self, chrome=True):
context = 'chrome' if chrome else 'content'
sandbox = None if chrome else 'system'
# Monkey patch mozcrash to avoid crash info output only for our triggered crashes.
mozcrash = runner.mozcrash
runner.mozcrash = self.mozcrash_mock
socket_timeout = self.marionette.client.socket_timeout
self.marionette.set_context(context)
try:
self.marionette.client.socket_timeout = self.socket_timeout
self.marionette.execute_script("""
// Copied from crash me simple
Components.utils.import("resource://gre/modules/ctypes.jsm");
// ctypes checks for NULL pointer derefs, so just go near-NULL.
var zero = new ctypes.intptr_t(8);
var badptr = ctypes.cast(zero, ctypes.PointerType(ctypes.int32_t));
var crash = badptr.contents;
""", sandbox=sandbox)
finally:
runner.mozcrash = mozcrash
self.marionette.client.socket_timeout = socket_timeout
class TestCrash(BaseCrashTestCase):
def test_crash_chrome_process(self):
self.assertRaisesRegexp(IOError, 'Process crashed',
self.crash, chrome=True)
self.assertEqual(self.marionette.crashed, 1)
self.assertIsNone(self.marionette.session)
self.assertRaisesRegexp(MarionetteException, 'Please start a session',
self.marionette.get_url)
self.marionette.start_session()
self.assertNotEqual(self.marionette.session['processId'], self.pid)
# TODO: Bug 1314594 - Causes a hang for the communication between the
# chrome and frame script.
# self.marionette.get_url()
@run_if_e10s
def test_crash_content_process(self):
# If e10s is disabled the chrome process crashes
self.marionette.navigate(self.remote_uri)
self.assertRaisesRegexp(IOError, 'Content process crashed',
self.crash, chrome=False)
self.assertEqual(self.marionette.crashed, 1)
self.assertIsNone(self.marionette.session)
self.assertRaisesRegexp(MarionetteException, 'Please start a session',
self.marionette.get_url)
self.marionette.start_session()
self.assertNotEqual(self.marionette.session['processId'], self.pid)
self.marionette.get_url()
@expectedFailure
def test_unexpected_crash(self):
self.crash(chrome=True)
class TestCrashInSetUp(BaseCrashTestCase):
def setUp(self):
super(TestCrashInSetUp, self).setUp()
self.assertRaisesRegexp(IOError, 'Process crashed',
self.crash, chrome=True)
self.assertEqual(self.marionette.crashed, 1)
self.assertIsNone(self.marionette.session)
def test_crash_in_setup(self):
self.marionette.start_session()
self.assertNotEqual(self.marionette.session['processId'], self.pid)
class TestCrashInTearDown(BaseCrashTestCase):
def tearDown(self):
try:
self.assertRaisesRegexp(IOError, 'Process crashed',
self.crash, chrome=True)
finally:
self.assertEqual(self.marionette.crashed, 1)
self.assertIsNone(self.marionette.session)
super(TestCrashInTearDown, self).tearDown()
def test_crash_in_teardown(self):
pass

Просмотреть файл

@ -140,4 +140,5 @@ skip-if = buildapp == 'b2g' || appname == 'fennec'
[test_addons.py] [test_addons.py]
[test_select.py] [test_select.py]
[test_crash.py]
[test_httpd.py] [test_httpd.py]

Просмотреть файл

@ -4,7 +4,7 @@ manifestparser >= 1.1
wptserve >= 1.3.0 wptserve >= 1.3.0
mozinfo >= 0.8 mozinfo >= 0.8
mozprocess >= 0.9 mozprocess >= 0.9
mozrunner >= 6.9 mozrunner >= 6.13
mozdevice >= 0.44 mozdevice >= 0.44
mozlog >= 3.0 mozlog >= 3.0
moznetwork >= 0.21 moznetwork >= 0.21