Add a new rule to automatically file bugs for new actionable crashes (#2117)

Co-authored-by: Marco Castelluccio <mcastelluccio@mozilla.com>
This commit is contained in:
Suhaib Mujahid 2023-06-28 06:05:56 -04:00 коммит произвёл GitHub
Родитель 671432e422
Коммит 3e3829d021
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 1386 добавлений и 1 удалений

Просмотреть файл

@ -15,7 +15,7 @@ class MyConfig(config.Config):
def __init__(self):
super(MyConfig, self).__init__()
if not os.path.exists(MyConfig.PATH):
self.conf = {"bz_api_key": "", "bz_api_key_nomail": ""}
self.conf = {"bz_api_key": "", "bz_api_key_nomail": "", "socorro_token": ""}
else:
with open(MyConfig.PATH) as In:
self.conf = json.load(In)
@ -28,12 +28,18 @@ class MyConfig(config.Config):
"Your config.json file must contain a Bugzilla token for an account that doesn't trigger bugmail (for testing, you can use the same token as bz_api_key)"
)
if "socorro_token" not in self.conf:
raise Exception("Your config.json file must contain a Socorro token")
def get(self, section, option, default=None, type=str):
if section == "Bugzilla":
if option == "token":
return self.conf["bz_api_key"]
if option == "nomail-token":
return self.conf["bz_api_key_nomail"]
elif section == "Socorro":
if option == "token":
return self.conf["socorro_token"]
elif section == "User-Agent":
return "bugbot"
return default

0
bugbot/crash/__init__.py Normal file
Просмотреть файл

738
bugbot/crash/analyzer.py Normal file
Просмотреть файл

@ -0,0 +1,738 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import itertools
import re
from collections import defaultdict
from datetime import date, timedelta
from functools import cached_property
from typing import Iterable, Iterator
from libmozdata import bugzilla, clouseau, connection, socorro
from libmozdata import utils as lmdutils
from libmozdata.bugzilla import Bugzilla
from libmozdata.connection import Connection
from bugbot import logger, utils
from bugbot.components import ComponentName
from bugbot.crash import socorro_util
# TODO: Move this to libmozdata
def generate_signature_page_url(params: dict, tab: str) -> str:
"""Generate a URL to the signature page on Socorro
Args:
params: the parameters for the search query.
tab: the page tab that should be selected.
Returns:
The URL of the signature page on Socorro
"""
web_url = socorro.Socorro.CRASH_STATS_URL
query = lmdutils.get_params_for_url(params)
return f"{web_url}/signature/{query}#{tab}"
# NOTE: At this point, we will file bugs on bugzilla-dev. Once we are confident
# that the bug filing is working as expected, we can switch to filing bugs in
# the production instance of Bugzilla.
class DevBugzilla(Bugzilla):
URL = "https://bugzilla-dev.allizom.org"
API_URL = URL + "/rest/bug"
ATTACHMENT_API_URL = API_URL + "/attachment"
TOKEN = utils.get_login_info()["bz_api_key_dev"]
class NoCrashReportFoundError(Exception):
"""There are no crash reports that meet the required criteria."""
class ClouseauDataAnalyzer:
"""Analyze the data returned by Crash Clouseau"""
MINIMUM_CLOUSEAU_SCORE_THRESHOLD: int = 8
DEFAULT_CRASH_COMPONENT = ComponentName("Core", "General")
def __init__(self, reports: Iterable[dict]):
self._clouseau_reports = reports
@cached_property
def max_clouseau_score(self):
"""The maximum Clouseau score in the crash reports."""
if not self._clouseau_reports:
return 0
return max(report["max_score"] for report in self._clouseau_reports)
@cached_property
def regressed_by_potential_bug_ids(self) -> set[int]:
"""The IDs for the bugs that their patches could have caused the crash."""
minimum_accepted_score = max(
self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
)
return {
changeset["bug_id"]
for report in self._clouseau_reports
if report["max_score"] >= minimum_accepted_score
for changeset in report["changesets"]
if changeset["max_score"] >= minimum_accepted_score
and not changeset["is_merge"]
and not changeset["is_backedout"]
}
@cached_property
def regressed_by_patch(self) -> str | None:
"""The hash of the patch that could have caused the crash."""
minimum_accepted_score = max(
self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
)
potential_patches = {
changeset["changeset"]
for report in self._clouseau_reports
if report["max_score"] >= minimum_accepted_score
for changeset in report["changesets"]
if changeset["max_score"] >= minimum_accepted_score
and not changeset["is_merge"]
and not changeset["is_backedout"]
}
if len(potential_patches) == 1:
return next(iter(potential_patches))
return None
@cached_property
def regressed_by(self) -> int | None:
"""The ID of the bug that one of its patches could have caused
the crash.
If there are multiple bugs, the value will be `None`.
"""
bug_ids = self.regressed_by_potential_bug_ids
if len(bug_ids) == 1:
return next(iter(bug_ids))
return None
@cached_property
def regressed_by_potential_bugs(self) -> list[dict]:
"""The bugs whose patches could have caused the crash."""
def handler(bug: dict, data: list):
data.append(bug)
bugs: list[dict] = []
Bugzilla(
bugids=self.regressed_by_potential_bug_ids,
include_fields=[
"id",
"assigned_to",
"product",
"component",
],
bughandler=handler,
bugdata=bugs,
).wait()
return bugs
@cached_property
def regressed_by_author(self) -> dict | None:
"""The author of the patch that could have caused the crash.
If there are multiple regressors, the value will be `None`.
The regressor bug assignee is considered as the author, even if the
assignee is not the patch author.
"""
if not self.regressed_by:
return None
bug = self.regressed_by_potential_bugs[0]
assert bug["id"] == self.regressed_by
return bug["assigned_to_detail"]
@cached_property
def crash_component(self) -> ComponentName:
"""The component that the crash belongs to.
If there are multiple components, the value will be the default one.
"""
potential_components = {
ComponentName(bug["product"], bug["component"])
for bug in self.regressed_by_potential_bugs
}
if len(potential_components) == 1:
return next(iter(potential_components))
return self.DEFAULT_CRASH_COMPONENT
class SocorroDataAnalyzer(socorro_util.SignatureStats):
"""Analyze the data returned by Socorro."""
_bugzilla_os_legal_values = None
_bugzilla_cpu_legal_values_map = None
_platforms = [
{"short_name": "win", "name": "Windows"},
{"short_name": "mac", "name": "Mac OS X"},
{"short_name": "lin", "name": "Linux"},
{"short_name": "and", "name": "Android"},
{"short_name": "unknown", "name": "Unknown"},
]
def __init__(
self,
signature: dict,
num_total_crashes: int,
):
super().__init__(signature, num_total_crashes, platforms=self._platforms)
@classmethod
def to_bugzilla_op_sys(cls, op_sys: str) -> str:
"""Return the corresponding OS name in Bugzilla for the provided OS name
from Socorro.
If the OS name is not recognized, return "Other".
"""
if cls._bugzilla_os_legal_values is None:
cls._bugzilla_os_legal_values = set(
bugzilla.BugFields.fetch_field_values("op_sys")
)
if op_sys in cls._bugzilla_os_legal_values:
return op_sys
if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
op_sys = "macOS"
elif op_sys.startswith("Windows"):
op_sys = "Windows"
elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
op_sys = "Linux"
else:
op_sys = "Other"
return op_sys
@property
def bugzilla_op_sys(self) -> str:
"""The name of the OS where the crash happens.
The value is one of the legal values for Bugzilla's `op_sys` field.
- If no OS name is found, the value will be "Unspecified".
- If the OS name is not recognized, the value will be "Other".
- If multiple OS names are found, the value will be "All". Unless the OS
names can be resolved to a common name without a version. For example,
"Windows 10" and "Windows 7" will become "Windows".
"""
all_op_sys = {
self.to_bugzilla_op_sys(op_sys["term"])
for op_sys in self.signature["facets"]["platform_pretty_version"]
}
if len(all_op_sys) > 1:
# Resolve to root OS name by removing the version number.
all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
if len(all_op_sys) == 2 and "Other" in all_op_sys:
# TODO: explain this workaround.
all_op_sys.remove("Other")
if len(all_op_sys) == 1:
return next(iter(all_op_sys))
if len(all_op_sys) == 0:
return "Unspecified"
return "All"
@classmethod
def to_bugzilla_cpu(cls, cpu: str) -> str:
"""Return the corresponding CPU name in Bugzilla for the provided name
from Socorro.
If the CPU is not recognized, return "Other".
"""
if cls._bugzilla_cpu_legal_values_map is None:
cls._bugzilla_cpu_legal_values_map = {
value.lower(): value
for value in bugzilla.BugFields.fetch_field_values("rep_platform")
}
return cls._bugzilla_cpu_legal_values_map.get(cpu, "Other")
@property
def bugzilla_cpu_arch(self) -> str:
"""The CPU architecture of the devices where the crash happens.
The value is one of the legal values for Bugzilla's `rep_platform` field.
- If no CPU architecture is found, the value will be "Unspecified".
- If the CPU architecture is not recognized, the value will be "Other".
- If multiple CPU architectures are found, the value will "All".
"""
all_cpu_arch = {
self.to_bugzilla_cpu(cpu["term"])
for cpu in self.signature["facets"]["cpu_arch"]
}
if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
all_cpu_arch.remove("Other")
if len(all_cpu_arch) == 1:
return next(iter(all_cpu_arch))
if len(all_cpu_arch) == 0:
return "Unspecified"
return "All"
@property
def user_comments_page_url(self) -> str:
"""The URL to the Signature page on Socorro where the Comments tab is
selected.
"""
start_date = date.today() - timedelta(weeks=26)
params = {
"signature": self.signature_term,
"date": socorro.SuperSearch.get_search_date(start_date),
}
return generate_signature_page_url(params, "comments")
@property
def num_user_comments(self) -> int:
"""The number of crash reports with user comments."""
# TODO: count useful/interesting user comments (e.g., exclude one word comments)
return self.signature["facets"]["cardinality_user_comments"]["value"]
@property
def has_user_comments(self) -> bool:
"""Whether the crash signature has any reports with a user comment."""
return self.num_user_comments > 0
@property
def top_proto_signature(self) -> str:
"""The proto signature that occurs the most."""
return self.signature["facets"]["proto_signature"][0]["term"]
@property
def num_top_proto_signature_crashes(self) -> int:
"""The number of crashes for the most occurring proto signature."""
return self.signature["facets"]["proto_signature"][0]["count"]
def _build_ids(self) -> Iterator[int]:
"""Yields the build IDs where the crash occurred."""
for build_id in self.signature["facets"]["build_id"]:
yield build_id["term"]
@property
def top_build_id(self) -> int:
"""The build ID where most crashes occurred."""
return self.signature["facets"]["build_id"][0]["term"]
class SignatureAnalyzer(SocorroDataAnalyzer, ClouseauDataAnalyzer):
"""Analyze the data related to a signature.
This includes data from Socorro and Clouseau.
"""
def __init__(
self,
socorro_signature: dict,
num_total_crashes: int,
clouseau_reports: list[dict],
):
SocorroDataAnalyzer.__init__(self, socorro_signature, num_total_crashes)
ClouseauDataAnalyzer.__init__(self, clouseau_reports)
def _fetch_crash_reports(
self,
proto_signature: str,
build_id: int | Iterable[int],
limit: int = 1,
) -> Iterator[dict]:
params = {
"proto_signature": "=" + proto_signature,
"build_id": build_id,
"_columns": [
"uuid",
],
"_results_number": limit,
}
def handler(res: dict, data: dict):
data.update(res)
data: dict = {}
socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
yield from data["hits"]
def fetch_representative_processed_crash(self) -> dict:
"""Fetch a processed crash to represent the signature.
This could fetch multiple processed crashes and return the one that is
most likely to be useful.
"""
limit_to_top_proto_signature = (
self.num_top_proto_signature_crashes / self.num_crashes > 0.6
)
reports = itertools.chain(
# Reports with a higher score from clouseau are more likely to be
# useful.
sorted(
self._clouseau_reports,
key=lambda report: report["max_score"],
reverse=True,
),
# Next we try find reports from the top crashing build because they
# are likely to be representative.
self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
self._fetch_crash_reports(self.top_proto_signature, self._build_ids()),
)
for report in reports:
uuid = report["uuid"]
processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
if (
not limit_to_top_proto_signature
or processed_crash["proto_signature"] == self.top_proto_signature
):
# TODO(investigate): maybe we should check if the stack is
# corrupted (ask gsvelto or willkg about how to detect that)
return processed_crash
raise NoCrashReportFoundError(
f"No crash report found with the most frequent proto signature for {self.signature_term}."
)
class SignaturesDataFetcher:
"""Fetch the data related to the given signatures."""
MEMORY_ACCESS_ERROR_REASONS = (
# On Windows:
"EXCEPTION_ACCESS_VIOLATION_READ",
"EXCEPTION_ACCESS_VIOLATION_WRITE",
"EXCEPTION_ACCESS_VIOLATION_EXEC"
# On Linux:
"SIGSEGV / SEGV_MAPERR",
"SIGSEGV / SEGV_ACCERR",
)
EXCLUDED_MOZ_REASON_STRINGS = (
"MOZ_CRASH(OOM)",
"MOZ_CRASH(Out of memory)",
"out of memory",
"Shutdown hanging",
# TODO(investigate): do we need to exclude signatures that their reason
# contains `[unhandlable oom]`?
# Example: arena_t::InitChunk | arena_t::AllocRun | arena_t::MallocLarge | arena_t::Malloc | BaseAllocator::malloc | Allocator::malloc | PageMalloc
# "[unhandlable oom]",
)
# If any of the crash reason starts with any of the following, then it is
# Network or I/O error.
EXCLUDED_IO_ERROR_REASON_PREFIXES = (
"EXCEPTION_IN_PAGE_ERROR_READ",
"EXCEPTION_IN_PAGE_ERROR_WRITE",
"EXCEPTION_IN_PAGE_ERROR_EXEC",
)
# TODO(investigate): do we need to exclude all these signatures prefixes?
EXCLUDED_SIGNATURE_PREFIXES = (
"OOM | ",
"bad hardware | ",
"shutdownhang | ",
)
def __init__(
self,
signatures: Iterable[str],
product: str = "Firefox",
channel: str = "nightly",
):
self._signatures = set(signatures)
self._product = product
self._channel = channel
@classmethod
def find_new_actionable_crashes(
cls,
product: str,
channel: str,
days_to_check: int = 7,
days_without_crashes: int = 7,
) -> "SignaturesDataFetcher":
"""Find new actionable crashes.
Args:
product: The product to check.
channel: The release channel to check.
days_to_check: The number of days to check for crashes.
days_without_crashes: The number of days without crashes before the
`days_to_check` to consider the signature new.
Returns:
A list of actionable signatures.
"""
duration = days_to_check + days_without_crashes
end_date = lmdutils.get_date_ymd("today")
start_date = end_date - timedelta(duration)
earliest_allowed_date = lmdutils.get_date_str(
end_date - timedelta(days_to_check)
)
date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
params = {
"product": product,
"release_channel": channel,
"date": date_range,
# TODO(investigate): should we do a local filter instead of the
# following (should we exclude the signature if one of the crashes
# is a shutdown hang?):
# If the `ipc_shutdown_state` or `shutdown_progress` field are
# non-empty then it's a shutdown hang.
"ipc_shutdown_state": "__null__",
"shutdown_progress": "__null__",
# TODO(investigate): should we use the following instead of the
# local filter.
# "oom_allocation_size": "!__null__",
"_aggs.signature": [
"moz_crash_reason",
"reason",
"_histogram.date",
"_cardinality.install_time",
"_cardinality.oom_allocation_size",
],
"_results_number": 0,
"_facets_size": 10000,
}
def handler(search_resp: dict, data: list):
logger.debug(
"Total of %d signatures received from Socorro",
len(search_resp["facets"]["signature"]),
)
for crash in search_resp["facets"]["signature"]:
signature = crash["term"]
if any(
signature.startswith(excluded_prefix)
for excluded_prefix in cls.EXCLUDED_SIGNATURE_PREFIXES
):
# Ignore signatures that start with any of the excluded prefixes.
continue
facets = crash["facets"]
installations = facets["cardinality_install_time"]["value"]
if installations <= 1:
# Ignore crashes that only happen on one installation.
continue
first_date = facets["histogram_date"][0]["term"]
if first_date < earliest_allowed_date:
# The crash is not new, skip it.
continue
if any(
reason["term"].startswith(io_error_prefix)
for reason in facets["reason"]
for io_error_prefix in cls.EXCLUDED_IO_ERROR_REASON_PREFIXES
):
# Ignore Network or I/O error crashes.
continue
if crash["count"] < 20:
# For signatures with low volume, having multiple types of
# memory errors indicates potential bad hardware crashes.
num_memory_error_types = sum(
reason["term"] in cls.MEMORY_ACCESS_ERROR_REASONS
for reason in facets["reason"]
)
if num_memory_error_types > 1:
# Potential bad hardware crash, skip it.
continue
# TODO: Add a filter using the `possible_bit_flips_max_confidence`
# field to exclude bad hardware crashes. The filed is not available yet.
# See: https://bugzilla.mozilla.org/show_bug.cgi?id=1816669#c3
# TODO(investigate): is this needed since we are already
# filtering signatures that start with "OOM | "
if facets["cardinality_oom_allocation_size"]["value"]:
# If one of the crashes is an OOM crash, skip it.
continue
# TODO(investigate): do we need to check for the `moz_crash_reason`
moz_crash_reasons = facets["moz_crash_reason"]
if moz_crash_reasons and any(
excluded_reason in reason["term"]
for reason in moz_crash_reasons
for excluded_reason in cls.EXCLUDED_MOZ_REASON_STRINGS
):
continue
data.append(signature)
signatures: list = []
socorro.SuperSearch(
params=params,
handler=handler,
handlerdata=signatures,
).wait()
logger.debug(
"Total of %d signatures left after applying the filtering criteria",
len(signatures),
)
return cls(signatures, product, channel)
def fetch_clouseau_crash_reports(self) -> dict[str, list]:
"""Fetch the crash reports data from Crash Clouseau."""
signature_reports = clouseau.Reports.get_by_signatures(
self._signatures,
product=self._product,
channel=self._channel,
)
logger.debug(
"Total of %d signatures received from Clouseau", len(signature_reports)
)
return signature_reports
def fetch_socorro_info(self) -> tuple[list[dict], int]:
"""Fetch the signature data from Socorro."""
# TODO(investigate): should we increase the duration to 6 months?
duration = timedelta(weeks=1)
end_date = lmdutils.get_date_ymd("today")
start_date = end_date - duration
date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
params = {
"product": self._product,
# TODO(investigate): should we included all release channels?
"release_channel": self._channel,
# TODO(investigate): should we limit based on the build date as well?
"date": date_range,
# TODO: split signatures into chunks to avoid very long query URLs
"signature": ["=" + signature for signature in self._signatures],
"_aggs.signature": [
"build_id",
"cpu_arch",
"proto_signature",
"_cardinality.user_comments",
"cpu_arch",
"platform_pretty_version",
# The following are needed for SignatureStats:
"platform",
"is_garbage_collecting",
"_cardinality.install_time",
"startup_crash",
"_histogram.uptime",
"process_type",
],
"_results_number": 0,
"_facets_size": 10000,
}
def handler(search_results: dict, data: dict):
data["num_total_crashes"] = search_results["total"]
data["signatures"] = search_results["facets"]["signature"]
data: dict = {}
socorro.SuperSearchUnredacted(
params=params,
handler=handler,
handlerdata=data,
).wait()
logger.debug(
"Fetch info from Socorro for %d signatures", len(data["signatures"])
)
return data["signatures"], data["num_total_crashes"]
def fetch_bugs(self, include_fields: list[str] = None) -> dict[str, list[dict]]:
"""Fetch bugs that are filed against the given signatures."""
params_base: dict = {
"include_fields": [
"cf_crash_signature",
],
}
if include_fields:
params_base["include_fields"].extend(include_fields)
params_list = []
for signatures_chunk in Connection.chunks(list(self._signatures), 30):
params = params_base.copy()
n = int(utils.get_last_field_num(params))
params[f"f{n}"] = "OP"
params[f"j{n}"] = "OR"
for signature in signatures_chunk:
n += 1
params[f"f{n}"] = "cf_crash_signature"
params[f"o{n}"] = "regexp"
params[f"v{n}"] = rf"\[(@ |@){re.escape(signature)}( \]|\])"
params[f"f{n+1}"] = "CP"
params_list.append(params)
signatures_bugs: dict = defaultdict(list)
def handler(res, data):
for bug in res["bugs"]:
for signature in utils.get_signatures(bug["cf_crash_signature"]):
if signature in self._signatures:
data[signature].append(bug)
Bugzilla(
queries=[
connection.Query(Bugzilla.API_URL, params, handler, signatures_bugs)
for params in params_list
],
).wait()
# TODO: remove the call to DevBugzilla after moving to production
DevBugzilla(
queries=[
connection.Query(DevBugzilla.API_URL, params, handler, signatures_bugs)
for params in params_list
],
).wait()
logger.debug(
"Total of %d signatures already have bugs filed", len(signatures_bugs)
)
return signatures_bugs
def analyze(self) -> list[SignatureAnalyzer]:
"""Analyze the data related to the signatures."""
bugs = self.fetch_bugs()
# TODO(investigate): For now, we are ignoring signatures that have bugs
# filed even if they are closed long time ago. We should investigate
# whether we should include the ones with closed bugs. For example, if
# the bug was closed as Fixed years ago.
self._signatures.difference_update(bugs.keys())
clouseau_reports = self.fetch_clouseau_crash_reports()
# TODO(investigate): For now, we are ignoring signatures that are not
# analyzed by clouseau. We should investigate why they are not analyzed
# and whether we should include them.
self._signatures.intersection_update(clouseau_reports.keys())
signatures, num_total_crashes = self.fetch_socorro_info()
logger.debug("Total of %d signatures will be analyzed", len(signatures))
return [
SignatureAnalyzer(
signature,
num_total_crashes,
clouseau_reports[signature["term"]],
)
for signature in signatures
]

Просмотреть файл

@ -0,0 +1,399 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
"""The code in this module was borrowed from Socorro (some parts were adjusted).
Each function, class, or dictionary is documented with a link to the original
source.
"""
import re
from functools import cached_property
from itertools import islice
# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/templatetags/jinja_helpers.py#L182-L203
def generate_bug_description_data(report) -> dict:
crashing_thread = get_crashing_thread(report)
parsed_dump = get_parsed_dump(report) or {}
frames = None
threads = parsed_dump.get("threads")
if threads:
thread_index = crashing_thread or 0
frames = bugzilla_thread_frames(parsed_dump["threads"][thread_index])
return {
"uuid": report["uuid"],
# NOTE(willkg): this is the redacted stack trace--not the raw one that can
# have PII in it
"java_stack_trace": report.get("java_stack_trace", None),
# NOTE(willkg): this is the redacted mozcrashreason--not the raw one that
# can have PII in it
"moz_crash_reason": report.get("moz_crash_reason", None),
"reason": report.get("reason", None),
"frames": frames,
"crashing_thread": crashing_thread,
}
# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/templatetags/jinja_helpers.py#L227-L278
def bugzilla_thread_frames(thread):
"""Build frame information for bug creation link
Extract frame info for the top frames of a crashing thread to be included in the
Bugzilla summary when reporting the crash.
:arg thread: dict of thread information including "frames" list
:returns: list of frame information dicts
"""
def frame_generator(thread):
"""Yield frames in a thread factoring in inlines"""
for frame in thread["frames"]:
for inline in frame.get("inlines") or []:
yield {
"frame": frame.get("frame", "?"),
"module": frame.get("module", ""),
"signature": inline["function"],
"file": inline["file"],
"line": inline["line"],
}
yield frame
# We only want to include 10 frames in the link
MAX_FRAMES = 10
frames = []
for frame in islice(frame_generator(thread), MAX_FRAMES):
# Source is an empty string if data isn't available
source = frame.get("file") or ""
if frame.get("line"):
source += ":{}".format(frame["line"])
signature = frame.get("signature") or ""
# Remove function arguments
if not signature.startswith("(unloaded"):
signature = re.sub(r"\(.*\)", "", signature)
frames.append(
{
"frame": frame.get("frame", "?"),
"module": frame.get("module") or "?",
"signature": signature,
"source": source,
}
)
return frames
# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/utils.py#L343-L359
def enhance_json_dump(dump, vcs_mappings):
"""
Add some information to the stackwalker's json_dump output
for display. Mostly applying vcs_mappings to stack frames.
"""
for thread_index, thread in enumerate(dump.get("threads", [])):
if "thread" not in thread:
thread["thread"] = thread_index
frames = thread["frames"]
for frame in frames:
enhance_frame(frame, vcs_mappings)
for inline in frame.get("inlines") or []:
enhance_frame(inline, vcs_mappings)
thread["frames"] = frames
return dump
# https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/utils.py#L259-L340
def enhance_frame(frame, vcs_mappings):
"""Add additional info to a stack frame
This adds signature and source links from vcs_mappings.
"""
# If this is a truncation frame, then we don't need to enhance it in any way
if frame.get("truncated") is not None:
return
if frame.get("function"):
# Remove spaces before all stars, ampersands, and commas
function = re.sub(r" (?=[\*&,])", "", frame["function"])
# Ensure a space after commas
function = re.sub(r",(?! )", ", ", function)
frame["function"] = function
signature = function
elif frame.get("file") and frame.get("line"):
signature = "%s#%d" % (frame["file"], frame["line"])
elif frame.get("module") and frame.get("module_offset"):
signature = "%s@%s" % (
frame["module"],
strip_leading_zeros(frame["module_offset"]),
)
elif frame.get("unloaded_modules"):
first_module = frame["unloaded_modules"][0]
if first_module.get("offsets"):
signature = "(unloaded %s@%s)" % (
first_module.get("module") or "",
strip_leading_zeros(first_module.get("offsets")[0]),
)
else:
signature = "(unloaded %s)" % first_module
else:
signature = "@%s" % frame["offset"]
frame["signature"] = signature
if signature.startswith("(unloaded"):
# If the signature is based on an unloaded module, leave the string as is
frame["short_signature"] = signature
else:
# Remove arguments which are enclosed in parens
frame["short_signature"] = re.sub(r"\(.*\)", "", signature)
if frame.get("file"):
vcsinfo = frame["file"].split(":")
if len(vcsinfo) == 4:
vcstype, root, vcs_source_file, revision = vcsinfo
if "/" in root:
# The root is something like 'hg.mozilla.org/mozilla-central'
server, repo = root.split("/", 1)
else:
# E.g. 'gecko-generated-sources' or something without a '/'
repo = server = root
if (
vcs_source_file.count("/") > 1
and len(vcs_source_file.split("/")[0]) == 128
):
# In this case, the 'vcs_source_file' will be something like
# '{SHA-512 hex}/ipc/ipdl/PCompositorBridgeChild.cpp'
# So drop the sha part for the sake of the 'file' because
# we don't want to display a 128 character hex code in the
# hyperlink text.
vcs_source_file_display = "/".join(vcs_source_file.split("/")[1:])
else:
# Leave it as is if it's not unwieldy long.
vcs_source_file_display = vcs_source_file
if vcstype in vcs_mappings:
if server in vcs_mappings[vcstype]:
link = vcs_mappings[vcstype][server]
frame["file"] = vcs_source_file_display
frame["source_link"] = link % {
"repo": repo,
"file": vcs_source_file,
"revision": revision,
"line": frame["line"],
}
else:
path_parts = vcs_source_file.split("/")
frame["file"] = path_parts.pop()
# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/socorro/signature/utils.py#L405-L422
def strip_leading_zeros(text):
"""Strips leading zeros from a hex string.
Example:
>>> strip_leading_zeros("0x0000000000032ec0")
"0x32ec0"
:param text: the text to strip leading zeros from
:returns: stripped text
"""
try:
return hex(int(text, base=16))
except (ValueError, TypeError):
return text
# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/settings/base.py#L268-L293
# Link to source if possible
VCS_MAPPINGS = {
"cvs": {
"cvs.mozilla.org": (
"http://bonsai.mozilla.org/cvsblame.cgi?file=%(file)s&rev=%(revision)s&mark=%(line)s#%(line)s"
)
},
"hg": {
"hg.mozilla.org": (
"https://hg.mozilla.org/%(repo)s/file/%(revision)s/%(file)s#l%(line)s"
)
},
"git": {
"git.mozilla.org": (
"http://git.mozilla.org/?p=%(repo)s;a=blob;f=%(file)s;h=%(revision)s#l%(line)s"
),
"github.com": (
"https://github.com/%(repo)s/blob/%(revision)s/%(file)s#L%(line)s"
),
},
"s3": {
"gecko-generated-sources": (
"/sources/highlight/?url=https://gecko-generated-sources.s3.amazonaws.com/%(file)s&line=%(line)s#L-%(line)s"
)
},
}
# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/views.py#L141-L153
def get_parsed_dump(report):
# For C++/Rust crashes
if "json_dump" in report:
json_dump = report["json_dump"]
# This is for displaying on the "Details" tab
enhance_json_dump(json_dump, VCS_MAPPINGS)
parsed_dump = json_dump
else:
parsed_dump = {}
return parsed_dump
# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/views.py#L155-L160
def get_crashing_thread(report):
if report["signature"].startswith("shutdownhang"):
# For shutdownhang signatures, we want to use thread 0 as the crashing thread,
# because that's the thread that actually contains the useful data about what
# happened.
return 0
return report.get("crashing_thread")
# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/utils.py#L73-L195
class SignatureStats:
def __init__(
self,
signature,
num_total_crashes,
rank=0,
platforms=None,
previous_signature=None,
):
self.signature = signature
self.num_total_crashes = num_total_crashes
self.rank = rank
self.platforms = platforms
self.previous_signature = previous_signature
@cached_property
def platform_codes(self):
return [x["short_name"] for x in self.platforms if x["short_name"] != "unknown"]
@cached_property
def signature_term(self):
return self.signature["term"]
@cached_property
def percent_of_total_crashes(self):
return 100.0 * self.signature["count"] / self.num_total_crashes
@cached_property
def num_crashes(self):
return self.signature["count"]
@cached_property
def num_crashes_per_platform(self):
num_crashes_per_platform = {
platform + "_count": 0 for platform in self.platform_codes
}
for platform in self.signature["facets"]["platform"]:
code = platform["term"][:3].lower()
if code in self.platform_codes:
num_crashes_per_platform[code + "_count"] = platform["count"]
return num_crashes_per_platform
@cached_property
def num_crashes_in_garbage_collection(self):
num_crashes_in_garbage_collection = 0
for row in self.signature["facets"]["is_garbage_collecting"]:
if row["term"].lower() == "t":
num_crashes_in_garbage_collection = row["count"]
return num_crashes_in_garbage_collection
@cached_property
def num_installs(self):
return self.signature["facets"]["cardinality_install_time"]["value"]
@cached_property
def percent_of_total_crashes_diff(self):
if self.previous_signature:
# The number should go "up" when moving towards 100 and "down" when moving
# towards 0
return (
self.percent_of_total_crashes
- self.previous_signature.percent_of_total_crashes
)
return "new"
@cached_property
def rank_diff(self):
if self.previous_signature:
# The number should go "up" when moving towards 1 and "down" when moving
# towards infinity
return self.previous_signature.rank - self.rank
return 0
@cached_property
def previous_percent_of_total_crashes(self):
if self.previous_signature:
return self.previous_signature.percent_of_total_crashes
return 0
@cached_property
def num_startup_crashes(self):
return sum(
row["count"]
for row in self.signature["facets"]["startup_crash"]
if row["term"] in ("T", "1")
)
@cached_property
def is_startup_crash(self):
return self.num_startup_crashes == self.num_crashes
@cached_property
def is_potential_startup_crash(self):
return (
self.num_startup_crashes > 0 and self.num_startup_crashes < self.num_crashes
)
@cached_property
def is_startup_window_crash(self):
is_startup_window_crash = False
for row in self.signature["facets"]["histogram_uptime"]:
# Aggregation buckets use the lowest value of the bucket as
# term. So for everything between 0 and 60 excluded, the
# term will be `0`.
if row["term"] < 60:
ratio = 1.0 * row["count"] / self.num_crashes
is_startup_window_crash = ratio > 0.5
return is_startup_window_crash
@cached_property
def is_plugin_crash(self):
for row in self.signature["facets"]["process_type"]:
if row["term"].lower() == "plugin":
return row["count"] > 0
return False
@cached_property
def is_startup_related_crash(self):
return (
self.is_startup_crash
or self.is_potential_startup_crash
or self.is_startup_window_crash
)

Просмотреть файл

@ -0,0 +1,160 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import pprint
import jinja2
import requests
from bugbot import logger
from bugbot.bzcleaner import BzCleaner
from bugbot.crash import socorro_util
from bugbot.crash.analyzer import DevBugzilla, SignaturesDataFetcher
class FileCrashBug(BzCleaner):
"""File bugs for new actionable crashes."""
# NOTE: If you make changes that affect the output of the rule, you should
# increment this number. This is needed in the experimental phase only.
VERSION = 1
MAX_BUG_TITLE_LENGTH = 255
def __init__(self):
super().__init__()
self.bug_description_template = jinja2.Environment(
loader=jinja2.FileSystemLoader("templates")
).get_template("file_crash_bug_description.md.jinja")
def description(self):
return "New actionable crashes"
def columns(self):
return ["component", "id", "summary"]
def get_bz_params(self, date):
return {
"resolution": ["---", "FIXED"],
"keywords": ["feature", "regression"],
"keywords_type": "allwords",
}
def get_bugs(self, date):
self.query_url = None
bugs = {}
signatures = SignaturesDataFetcher.find_new_actionable_crashes(
"Firefox", "nightly"
)
for signature in signatures.analyze():
logger.debug("Generating bug for signature: %s", signature.signature_term)
title = (
f"Startup crash in [@ {signature.signature_term}]"
if signature.is_startup_related_crash
else f"Crash in [@ {signature.signature_term}]"
)
if len(title) > self.MAX_BUG_TITLE_LENGTH:
title = title[: self.MAX_BUG_TITLE_LENGTH - 3] + "..."
# TODO: Handle cases where the regressor is a security bug. In such
# cases, we may want to file the bug as security bug.
flags = None
if signature.regressed_by:
# TODO: check user activity and if the ni? is open
flags = [
{
"name": "needinfo",
"requestee": signature.regressed_by_author["name"],
"status": "?",
"new": "true",
}
]
report = signature.fetch_representative_processed_crash()
description = self.bug_description_template.render(
{
**socorro_util.generate_bug_description_data(report),
"signature": signature,
"needinfo_regression_author": bool(flags),
}
)
# TODO: Provide the following information:
# [X] Crash signature
# [X] Top 10 frames of crashing thread
# [X] Component
# [X] The kind of crash
# [ ] Regression window
# [X] Inducing patch
# [X] Reason
# [X] Regressed by
# [X] Platform
# [ ] Firefox status flags
# [ ] Severity
# [ ] Time correlation
# [X] User comments
# [ ] Crash address commonalities
# [ ] Estimated future crash volume
bug_data = {
"blocks": "bugbot-auto-crash",
"type": "defect",
"keywords": ["crash"],
"status_whiteboard": f"[bugbot-crash-v{self.VERSION}]",
"summary": title,
"product": signature.crash_component.product,
"component": signature.crash_component.name,
"op_sys": signature.bugzilla_op_sys,
"rep_platform": signature.bugzilla_cpu_arch,
"cf_crash_signature": f"[@ {signature.signature_term}]",
"description": description,
# TODO: Uncomment the following lines when we move to file on
# the production instance of Bugzilla. Filling `regressed_by` or
# `flags` on bugzilla-dev will cause "bug does not exist" errors.
# "regressed_by": signature.regressed_by,
# "flags": flags,
}
if self.dryrun:
logger.info("Dry-run bug:")
pprint.pprint(bug_data)
bug_id = str(len(bugs) + 1)
else:
# NOTE: When moving to production:
# - Use Bugzilla instead of DevBugzilla
# - Drop the DevBugzilla class
# - Update the bug URL `file_crash_bug.html`
# - Drop the bug link `file_crash_bug_description.md.jinja`
# - Fill the `regressed_by` and `flags` fields
# - Create the bug using `utils.create_bug``
resp = requests.post(
url=DevBugzilla.API_URL,
json=bug_data,
headers=DevBugzilla([]).get_header(),
verify=True,
timeout=DevBugzilla.TIMEOUT,
)
resp.raise_for_status()
bug = resp.json()
bug_id = str(bug["id"])
# TODO: log the created bugs info somewhere (e.g., DB,
# spreadsheet, or LabelStudio)
bugs[bug_id] = {
"id": bug_id,
"summary": title,
"component": signature.crash_component,
}
logger.debug("Total of %d bugs have been filed", len(bugs))
return bugs
if __name__ == "__main__":
FileCrashBug().run()

Просмотреть файл

@ -73,4 +73,7 @@ python -m bugbot.rules.multifix_regression --production
# Copy metadata from duplicates
python -m bugbot.rules.duplicate_copy_metadata --production
# File bugs for new actionable crashes
python -m bugbot.rules.file_crash_bug --production
source ./scripts/cron_common_end.sh

Просмотреть файл

@ -0,0 +1,21 @@
<p>BugBot filed the following crash {{ plural('bug', data) }}:</p>
<table {{ table_attrs }}>
<thead>
<tr>
<th>Component</th>
<th>Bug</th>
<th>Summary</th>
</tr>
</thead>
<tbody>
{% for i, (comp, bugid, summary) in enumerate(data) -%}
<tr {% if i % 2 == 0 %}bgcolor="#E0E0E0"{%- endif %}>
<td>{{ comp | e }}</td>
<td>
<a href="https://bugzilla-dev.allizom.org/show_bug.cgi?id={{ bugid }}">{{ bugid }}</a>
</td>
<td>{{ summary | e }}</td>
</tr>
{% endfor -%}
</tbody>
</table>

Просмотреть файл

@ -0,0 +1,58 @@
{#
Part of this template was extracted from Socorro's repository:
https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/jinja2/crashstats/bug_comment.txt
https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/signature/jinja2/signature/signature_summary.html#L7-L41
#}
Crash report: https://crash-stats.mozilla.org/report/index/{{ uuid }}
{% if java_stack_trace %}
Java stack trace:
```
{{ java_stack_trace|truncate(5000, True)|safe }}
```
{% elif frames %}
{% if moz_crash_reason %}
MOZ_CRASH Reason: ```{{ moz_crash_reason|safe }}```
{% elif reason %}
Reason: ```{{ reason|safe }}```
{% endif %}
{% if crashing_thread is none %}
No crashing thread identified; using thread 0.
{% endif %}
Top {{ frames|length }} frames of crashing thread:
```
{% for frame in frames -%}
{{ frame.frame|safe}} {{ frame.module|safe }} {{ frame.signature|safe }} {{ frame.source|safe }}
{% endfor -%}
```
{% endif %}
Here are some insights about the crash signature based on recent data at the time of reporting the bug:
- **Crash kind:** {{ "Plugin Crash" if signature.is_plugin_crash else "Browser Crash" }}
- **Is startup crash:** {{ "Yes - " if signature.is_startup_related_crash else "No" }}
{%- if signature.is_startup_crash -%}
all crashes happened during startup
{%- elif signature.is_potential_startup_crash -%}
{{ signature.num_startup_crashes }} out of {{ signature.num_crashes }} crashes happened during startup
{%- elif signature.is_startup_window_crash -%}
more than half of the crashes happened during the first minute after launch
{%- endif %}
- **Has user comments:** {% if signature.has_user_comments -%}
[Yes]({{ signature.user_comments_page_url }})
{%- else -%}
No
{%- endif %}
{% if signature.regressed_by %}
By analyzing the backtrace, the regression may have been introduced by a {{ "patch [1]" if signature.regressed_by_patch else "patch"}} to fix [Bug {{ signature.regressed_by }}](https://bugzilla.mozilla.org/show_bug.cgi?id={{ signature.regressed_by }}).
{% if signature.regressed_by_patch %}
[1] https://hg.mozilla.org/mozilla-central/rev?node={{ signature.regressed_by_patch }}
{%- endif %}
{% endif %}
{% if needinfo_regression_author %}
:{{ signature.regressed_by_author["nick"] }}, since you are the author of the potential regressor, could you please take a look?
{% endif %}