Bug 1299216 - Enhance handling of crashes in Marionette. r=automatedtester

There are currently various issues in Marionette which cause extra and unnecessary logging output
in case of chrome and content crashes. Especially the latter ones are not easy to discover. This
patch makes the following improvements:

* Correctly mark process crashes as IOError test failure. Until now we have only used the "Process
  has been closed" message, which does not correctly cover it, especially not if the process shutdown without
  a crash.

* Allow changing the socket_timeout for the socket client. This was mainly necessary to allow unit tests
  to run faster.

* Collect the number of crashes so it's known even later if the process has been crashed during the
  current session. It also fixes the case when check_for_crash() gets called twice, and for the
  second time False is returned which will trigger an invalid code path.

* Reduce code duplication when destroying a session.

* Adding a unit test to verify the correct behavior for chrome and content crashes.

MozReview-Commit-ID: KdUQuJqFRli

--HG--
extra : rebase_source : a2a0b0269ff84679acc6698c72ec8dc45b1dfe52
This commit is contained in:
Henrik Skupin 2016-11-02 13:49:32 +01:00
Родитель 0baa0a9c53
Коммит 590cdec86a
12 изменённых файлов: 273 добавлений и 62 удалений

Просмотреть файл

@ -33,31 +33,34 @@ def do_process_check(func, always=False):
def check_for_crash():
try:
m.check_for_crash()
except:
return m.check_for_crash()
except Exception:
# don't want to lose the original exception
traceback.print_exc()
return False
try:
return func(*args, **kwargs)
except (MarionetteException, IOError) as e:
exc, val, tb = sys.exc_info()
crashed = False
# In case of no Marionette failures ensure to check for possible crashes.
# Do it before checking for port disconnects, to avoid reporting of unrelated
# crashes due to a forced shutdown of the application.
if not isinstance(e, MarionetteException) or type(e) is MarionetteException:
if not always:
check_for_crash()
crashed = check_for_crash()
# In case of socket failures force a shutdown of the application
if type(e) in (socket.error, socket.timeout):
m.force_shutdown()
if type(e) in (socket.error, socket.timeout) or crashed:
m.handle_socket_failure(crashed)
raise exc, val, tb
finally:
if always:
check_for_crash(m)
check_for_crash()
return _

Просмотреть файл

@ -540,6 +540,7 @@ class Marionette(object):
CONTEXT_CHROME = 'chrome' # non-browser content: windows, dialogs, etc.
CONTEXT_CONTENT = 'content' # browser content: iframes, divs, etc.
DEFAULT_CRASH_TIMEOUT = 10
DEFAULT_SOCKET_TIMEOUT = 60
DEFAULT_STARTUP_TIMEOUT = 120
DEFAULT_SHUTDOWN_TIMEOUT = 65 # Firefox will kill hanging threads after 60s
@ -572,6 +573,7 @@ class Marionette(object):
self._test_name = None
self.timeout = timeout
self.socket_timeout = socket_timeout
self.crashed = 0
startup_timeout = startup_timeout or self.DEFAULT_STARTUP_TIMEOUT
if self.bin:
@ -621,7 +623,6 @@ class Marionette(object):
# hit an exception/died or the connection died. We can
# do no further server-side cleanup in this case.
pass
self.session = None
if self.instance:
self.instance.close()
@ -721,11 +722,8 @@ class Marionette(object):
else:
msg = self.client.request(name, params)
except socket.timeout:
self.session = None
self.window = None
self.client.close()
except IOError:
self.delete_session(send_request=False)
raise
res, err = msg.result, msg.error
@ -783,36 +781,54 @@ class Marionette(object):
self.set_page_load_timeout(30000)
def check_for_crash(self):
returncode = None
name = None
crashed = False
"""Check if the process crashed.
:returns: True, if a crash happened since the method has been called the last time.
"""
crash_count = 0
if self.instance:
if self.instance.runner.check_for_crashes(
test_name=self.test_name or os.path.basename(__file__)):
crashed = True
if returncode is not None:
print ('PROCESS-CRASH | {0} | abnormal termination with exit code {1}'
.format(name, returncode))
return crashed
name = self.test_name or 'marionette.py'
crash_count = self.instance.runner.check_for_crashes(test_name=name)
self.crashed = self.crashed + crash_count
def force_shutdown(self):
"""Force a shutdown of the running instance.
return crash_count > 0
If we've launched the binary we are connected to, wait for it to shut down.
In the case when it doesn't happen, force its shut down.
def handle_socket_failure(self, crashed=False):
"""Handle socket failures for the currently running instance.
:param crashed: Optional flag which indicates that the process has been crashed,
and no further socket checks have to be performed. Defaults to False.
If the application crashed then clean-up internal states, or in case of a content
crash also kill the process. If there are other reasons for a socket failure,
wait for the process to shutdown itself, or force kill it.
"""
if self.instance:
exc, val, tb = sys.exc_info()
# Give the application some time to shutdown
returncode = self.instance.runner.wait(timeout=self.DEFAULT_SHUTDOWN_TIMEOUT)
if returncode is None:
self.cleanup()
message = ('Process killed because the connection to Marionette server is lost.'
' Check gecko.log for errors')
# If the content process crashed, Marionette forces the application to shutdown.
if crashed:
returncode = self.instance.runner.wait(timeout=self.DEFAULT_CRASH_TIMEOUT)
if returncode == 0:
message = 'Content process crashed'
else:
message = 'Process crashed (Exit code: {returncode})'
self.delete_session(send_request=False, reset_session_id=True)
else:
message = 'Process has been closed (Exit code: {returncode})'
# Somehow the socket disconnected. Give the application some time to shutdown
# itself before killing the process.
returncode = self.instance.runner.wait(timeout=self.DEFAULT_SHUTDOWN_TIMEOUT)
if returncode is None:
self.quit()
message = ('Process killed because the connection to Marionette server is '
'lost. Check gecko.log for errors')
else:
message = 'Process has been closed (Exit code: {returncode})'
self.delete_session(send_request=False, reset_session_id=True)
if exc:
message += ' (Reason: {reason})'
@ -1135,12 +1151,14 @@ class Marionette(object):
callback()
else:
self._request_in_app_shutdown()
self.delete_session(in_app=True)
# Ensure to explicitely mark the session as deleted
self.delete_session(send_request=False, reset_session_id=True)
# Give the application some time to shutdown
self.instance.runner.wait(timeout=self.DEFAULT_SHUTDOWN_TIMEOUT)
else:
self.delete_session()
self.delete_session(reset_session_id=True)
self.instance.close()
@do_process_check
@ -1174,7 +1192,9 @@ class Marionette(object):
callback()
else:
self._request_in_app_shutdown("eRestart")
self.delete_session(in_app=True)
# Ensure to explicitely mark the session as deleted
self.delete_session(send_request=False, reset_session_id=True)
try:
self.raise_for_port()
@ -1221,7 +1241,11 @@ class Marionette(object):
:param session_id: unique identifier for the session. If no session id is
passed in then one will be generated by the marionette server.
:returns: A dict of the capabilities offered."""
:returns: A dict of the capabilities offered.
"""
self.crashed = 0
if self.instance:
returncode = self.instance.runner.returncode
if returncode is not None:
@ -1255,19 +1279,25 @@ class Marionette(object):
self._send_message("setTestName", {"value": test_name})
self._test_name = test_name
def delete_session(self, in_app=False):
def delete_session(self, send_request=True, reset_session_id=False):
"""Close the current session and disconnect from the server.
:param in_app: False, if the session should be closed from the client.
Otherwise a request to quit or restart the instance from
within the application itself is used.
:param send_request: Optional, if `True` a request to close the session on
the server side will be send. Use `False` in case of eg. in_app restart()
or quit(), which trigger a deletion themselves. Defaults to `True`.
:param reset_session_id: Optional, if `True` the current session id will
be reset, which will require an explicit call to `start_session()` before
the test can continue. Defaults to `False`.
"""
if not in_app:
self._send_message("deleteSession")
self.session_id = None
self.session = None
self.window = None
self.client.close()
try:
if send_request:
self._send_message("deleteSession")
finally:
if reset_session_id:
self.session_id = None
self.session = None
self.window = None
self.client.close()
@property
def session_capabilities(self):

Просмотреть файл

@ -125,7 +125,7 @@ class TcpTransport(object):
"""
self.addr = addr
self.port = port
self.socket_timeout = socket_timeout
self._socket_timeout = socket_timeout
self.protocol = 1
self.application_type = None
@ -133,6 +133,16 @@ class TcpTransport(object):
self.expected_response = None
self.sock = None
@property
def socket_timeout(self):
return self._socket_timeout
@socket_timeout.setter
def socket_timeout(self, value):
if self.sock:
self.sock.settimeout(value)
self._socket_timeout = value
def _unmarshal(self, packet):
msg = None

Просмотреть файл

@ -1 +1 @@
mozrunner >= 6.12
mozrunner >= 6.13

Просмотреть файл

@ -9,6 +9,7 @@ from .marionette_test import (
expectedFailure,
MarionetteJSTestCase,
MarionetteTestCase,
run_if_e10s,
skip,
skip_if_chrome,
skip_if_desktop,

Просмотреть файл

@ -8,6 +8,7 @@ __version__ = '3.1.0'
from .decorators import (
expectedFailure,
parameterized,
run_if_e10s,
skip,
skip_if_chrome,
skip_if_desktop,

Просмотреть файл

@ -61,6 +61,23 @@ def parameterized(func_suffix, *args, **kwargs):
return wrapped
def run_if_e10s(target):
"""Decorator which runs a test if e10s mode is active."""
def wrapper(self, *args, **kwargs):
with self.marionette.using_context('chrome'):
multi_process_browser = self.marionette.execute_script("""
try {
return Services.appinfo.browserTabsRemoteAutostart;
} catch (e) {
return false;
}""")
if not multi_process_browser:
raise SkipTest('skipping due to e10s is disabled')
return target(self, *args, **kwargs)
return wrapper
def skip(reason):
"""Decorator which unconditionally skips a test."""
def decorator(test_item):

Просмотреть файл

@ -6,7 +6,6 @@ import imp
import os
import re
import sys
import socket
import time
import types
import unittest
@ -274,13 +273,9 @@ class CommonTestCase(unittest.TestCase):
self.loglines = [['Error getting log: {}'.format(inst)]]
try:
self.marionette.delete_session()
except (socket.error, MarionetteException, IOError):
except IOError:
# Gecko has crashed?
self.marionette.session = None
try:
self.marionette.client.close()
except socket.error:
pass
pass
self.marionette = None
def setup_SpecialPowers_observer(self):
@ -484,7 +479,7 @@ class MarionetteTestCase(CommonTestCase):
if not self.marionette.session:
self.marionette.start_session()
if not self.marionette.check_for_crash():
if not self.marionette.crashed:
try:
self.marionette.clear_imported_scripts()
self.marionette.execute_script("log('TEST-END: {0}:{1}')"

Просмотреть файл

@ -23,7 +23,6 @@ import mozprofile
from manifestparser import TestManifest
from manifestparser.filters import tags
from marionette_driver.marionette import Marionette
from mozlog import get_default_logger
from moztest.adapters.unit import StructuredTestRunner, StructuredTestResult
from moztest.results import TestResultCollection, TestResult, relevant_line
import mozversion
@ -568,9 +567,8 @@ class BaseMarionetteTestRunner(object):
rv['screenshot'] = marionette.screenshot()
with marionette.using_context(marionette.CONTEXT_CONTENT):
rv['source'] = marionette.page_source
except Exception:
logger = get_default_logger()
logger.warning('Failed to gather test failure debug.', exc_info=True)
except Exception as exc:
self.logger.warning('Failed to gather test failure debug: {}'.format(exc))
return rv
self.result_callbacks.append(gather_debug)

Просмотреть файл

@ -0,0 +1,155 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import glob
import shutil
from marionette import MarionetteTestCase, expectedFailure, run_if_e10s
from marionette_driver.errors import MarionetteException
# Import runner module to monkey patch mozcrash module
from mozrunner.base import runner
class MockMozCrash(object):
"""Mock object to replace original mozcrash methods."""
def __init__(self, marionette):
self.marionette = marionette
with self.marionette.using_context('chrome'):
self.crash_reporter_enabled = self.marionette.execute_script("""
try {
Components.classes["@mozilla.org/toolkit/crash-reporter;1"].
getService(Components.interfaces.nsICrashReporter);
return true;
} catch (exc) {
return false;
}
""")
def check_for_crashes(self, dump_directory, *args, **kwargs):
minidump_files = glob.glob('{}/*.dmp'.format(dump_directory))
shutil.rmtree(dump_directory, ignore_errors=True)
if self.crash_reporter_enabled:
return len(minidump_files)
else:
return len(minidump_files) == 0
def log_crashes(self, logger, dump_directory, *args, **kwargs):
return self.check_for_crashes(dump_directory, *args, **kwargs)
class BaseCrashTestCase(MarionetteTestCase):
# Reduce the timeout for faster processing of the tests
socket_timeout = 10
def setUp(self):
super(BaseCrashTestCase, self).setUp()
self.mozcrash_mock = MockMozCrash(self.marionette)
self.crash_count = self.marionette.crashed
self.pid = self.marionette.session["processId"]
self.remote_uri = self.marionette.absolute_url("javascriptPage.html")
def tearDown(self):
self.marionette.crashed = self.crash_count
super(BaseCrashTestCase, self).tearDown()
def crash(self, chrome=True):
context = 'chrome' if chrome else 'content'
sandbox = None if chrome else 'system'
# Monkey patch mozcrash to avoid crash info output only for our triggered crashes.
mozcrash = runner.mozcrash
runner.mozcrash = self.mozcrash_mock
socket_timeout = self.marionette.client.socket_timeout
self.marionette.set_context(context)
try:
self.marionette.client.socket_timeout = self.socket_timeout
self.marionette.execute_script("""
// Copied from crash me simple
Components.utils.import("resource://gre/modules/ctypes.jsm");
// ctypes checks for NULL pointer derefs, so just go near-NULL.
var zero = new ctypes.intptr_t(8);
var badptr = ctypes.cast(zero, ctypes.PointerType(ctypes.int32_t));
var crash = badptr.contents;
""", sandbox=sandbox)
finally:
runner.mozcrash = mozcrash
self.marionette.client.socket_timeout = socket_timeout
class TestCrash(BaseCrashTestCase):
def test_crash_chrome_process(self):
self.assertRaisesRegexp(IOError, 'Process crashed',
self.crash, chrome=True)
self.assertEqual(self.marionette.crashed, 1)
self.assertIsNone(self.marionette.session)
self.assertRaisesRegexp(MarionetteException, 'Please start a session',
self.marionette.get_url)
self.marionette.start_session()
self.assertNotEqual(self.marionette.session['processId'], self.pid)
# TODO: Bug 1314594 - Causes a hang for the communication between the
# chrome and frame script.
# self.marionette.get_url()
@run_if_e10s
def test_crash_content_process(self):
# If e10s is disabled the chrome process crashes
self.marionette.navigate(self.remote_uri)
self.assertRaisesRegexp(IOError, 'Content process crashed',
self.crash, chrome=False)
self.assertEqual(self.marionette.crashed, 1)
self.assertIsNone(self.marionette.session)
self.assertRaisesRegexp(MarionetteException, 'Please start a session',
self.marionette.get_url)
self.marionette.start_session()
self.assertNotEqual(self.marionette.session['processId'], self.pid)
self.marionette.get_url()
@expectedFailure
def test_unexpected_crash(self):
self.crash(chrome=True)
class TestCrashInSetUp(BaseCrashTestCase):
def setUp(self):
super(TestCrashInSetUp, self).setUp()
self.assertRaisesRegexp(IOError, 'Process crashed',
self.crash, chrome=True)
self.assertEqual(self.marionette.crashed, 1)
self.assertIsNone(self.marionette.session)
def test_crash_in_setup(self):
self.marionette.start_session()
self.assertNotEqual(self.marionette.session['processId'], self.pid)
class TestCrashInTearDown(BaseCrashTestCase):
def tearDown(self):
try:
self.assertRaisesRegexp(IOError, 'Process crashed',
self.crash, chrome=True)
finally:
self.assertEqual(self.marionette.crashed, 1)
self.assertIsNone(self.marionette.session)
super(TestCrashInTearDown, self).tearDown()
def test_crash_in_teardown(self):
pass

Просмотреть файл

@ -140,4 +140,5 @@ skip-if = buildapp == 'b2g' || appname == 'fennec'
[test_addons.py]
[test_select.py]
[test_crash.py]
[test_httpd.py]

Просмотреть файл

@ -4,7 +4,7 @@ manifestparser >= 1.1
wptserve >= 1.3.0
mozinfo >= 0.8
mozprocess >= 0.9
mozrunner >= 6.9
mozrunner >= 6.13
mozdevice >= 0.44
mozlog >= 3.0
moznetwork >= 0.21