From 3e3829d0217ce4dbb2049fc5bc7aca63913ff3ba Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 28 Jun 2023 06:05:56 -0400 Subject: [PATCH] Add a new rule to automatically file bugs for new actionable crashes (#2117) Co-authored-by: Marco Castelluccio --- bugbot/config.py | 8 +- bugbot/crash/__init__.py | 0 bugbot/crash/analyzer.py | 738 ++++++++++++++++++ bugbot/crash/socorro_util.py | 399 ++++++++++ bugbot/rules/file_crash_bug.py | 160 ++++ scripts/cron_run_hourly.sh | 3 + templates/file_crash_bug.html | 21 + templates/file_crash_bug_description.md.jinja | 58 ++ 8 files changed, 1386 insertions(+), 1 deletion(-) create mode 100644 bugbot/crash/__init__.py create mode 100644 bugbot/crash/analyzer.py create mode 100644 bugbot/crash/socorro_util.py create mode 100644 bugbot/rules/file_crash_bug.py create mode 100644 templates/file_crash_bug.html create mode 100644 templates/file_crash_bug_description.md.jinja diff --git a/bugbot/config.py b/bugbot/config.py index 44a7dc44..d28eb8f5 100644 --- a/bugbot/config.py +++ b/bugbot/config.py @@ -15,7 +15,7 @@ class MyConfig(config.Config): def __init__(self): super(MyConfig, self).__init__() if not os.path.exists(MyConfig.PATH): - self.conf = {"bz_api_key": "", "bz_api_key_nomail": ""} + self.conf = {"bz_api_key": "", "bz_api_key_nomail": "", "socorro_token": ""} else: with open(MyConfig.PATH) as In: self.conf = json.load(In) @@ -28,12 +28,18 @@ class MyConfig(config.Config): "Your config.json file must contain a Bugzilla token for an account that doesn't trigger bugmail (for testing, you can use the same token as bz_api_key)" ) + if "socorro_token" not in self.conf: + raise Exception("Your config.json file must contain a Socorro token") + def get(self, section, option, default=None, type=str): if section == "Bugzilla": if option == "token": return self.conf["bz_api_key"] if option == "nomail-token": return self.conf["bz_api_key_nomail"] + elif section == "Socorro": + if option == "token": + return self.conf["socorro_token"] elif section == "User-Agent": return "bugbot" return default diff --git a/bugbot/crash/__init__.py b/bugbot/crash/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bugbot/crash/analyzer.py b/bugbot/crash/analyzer.py new file mode 100644 index 00000000..a75fc710 --- /dev/null +++ b/bugbot/crash/analyzer.py @@ -0,0 +1,738 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +import itertools +import re +from collections import defaultdict +from datetime import date, timedelta +from functools import cached_property +from typing import Iterable, Iterator + +from libmozdata import bugzilla, clouseau, connection, socorro +from libmozdata import utils as lmdutils +from libmozdata.bugzilla import Bugzilla +from libmozdata.connection import Connection + +from bugbot import logger, utils +from bugbot.components import ComponentName +from bugbot.crash import socorro_util + + +# TODO: Move this to libmozdata +def generate_signature_page_url(params: dict, tab: str) -> str: + """Generate a URL to the signature page on Socorro + + Args: + params: the parameters for the search query. + tab: the page tab that should be selected. + + Returns: + The URL of the signature page on Socorro + """ + web_url = socorro.Socorro.CRASH_STATS_URL + query = lmdutils.get_params_for_url(params) + return f"{web_url}/signature/{query}#{tab}" + + +# NOTE: At this point, we will file bugs on bugzilla-dev. Once we are confident +# that the bug filing is working as expected, we can switch to filing bugs in +# the production instance of Bugzilla. +class DevBugzilla(Bugzilla): + URL = "https://bugzilla-dev.allizom.org" + API_URL = URL + "/rest/bug" + ATTACHMENT_API_URL = API_URL + "/attachment" + TOKEN = utils.get_login_info()["bz_api_key_dev"] + + +class NoCrashReportFoundError(Exception): + """There are no crash reports that meet the required criteria.""" + + +class ClouseauDataAnalyzer: + """Analyze the data returned by Crash Clouseau""" + + MINIMUM_CLOUSEAU_SCORE_THRESHOLD: int = 8 + DEFAULT_CRASH_COMPONENT = ComponentName("Core", "General") + + def __init__(self, reports: Iterable[dict]): + self._clouseau_reports = reports + + @cached_property + def max_clouseau_score(self): + """The maximum Clouseau score in the crash reports.""" + if not self._clouseau_reports: + return 0 + return max(report["max_score"] for report in self._clouseau_reports) + + @cached_property + def regressed_by_potential_bug_ids(self) -> set[int]: + """The IDs for the bugs that their patches could have caused the crash.""" + minimum_accepted_score = max( + self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score + ) + return { + changeset["bug_id"] + for report in self._clouseau_reports + if report["max_score"] >= minimum_accepted_score + for changeset in report["changesets"] + if changeset["max_score"] >= minimum_accepted_score + and not changeset["is_merge"] + and not changeset["is_backedout"] + } + + @cached_property + def regressed_by_patch(self) -> str | None: + """The hash of the patch that could have caused the crash.""" + minimum_accepted_score = max( + self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score + ) + potential_patches = { + changeset["changeset"] + for report in self._clouseau_reports + if report["max_score"] >= minimum_accepted_score + for changeset in report["changesets"] + if changeset["max_score"] >= minimum_accepted_score + and not changeset["is_merge"] + and not changeset["is_backedout"] + } + if len(potential_patches) == 1: + return next(iter(potential_patches)) + return None + + @cached_property + def regressed_by(self) -> int | None: + """The ID of the bug that one of its patches could have caused + the crash. + + If there are multiple bugs, the value will be `None`. + """ + bug_ids = self.regressed_by_potential_bug_ids + if len(bug_ids) == 1: + return next(iter(bug_ids)) + return None + + @cached_property + def regressed_by_potential_bugs(self) -> list[dict]: + """The bugs whose patches could have caused the crash.""" + + def handler(bug: dict, data: list): + data.append(bug) + + bugs: list[dict] = [] + Bugzilla( + bugids=self.regressed_by_potential_bug_ids, + include_fields=[ + "id", + "assigned_to", + "product", + "component", + ], + bughandler=handler, + bugdata=bugs, + ).wait() + + return bugs + + @cached_property + def regressed_by_author(self) -> dict | None: + """The author of the patch that could have caused the crash. + + If there are multiple regressors, the value will be `None`. + + The regressor bug assignee is considered as the author, even if the + assignee is not the patch author. + """ + + if not self.regressed_by: + return None + + bug = self.regressed_by_potential_bugs[0] + assert bug["id"] == self.regressed_by + return bug["assigned_to_detail"] + + @cached_property + def crash_component(self) -> ComponentName: + """The component that the crash belongs to. + + If there are multiple components, the value will be the default one. + """ + potential_components = { + ComponentName(bug["product"], bug["component"]) + for bug in self.regressed_by_potential_bugs + } + if len(potential_components) == 1: + return next(iter(potential_components)) + return self.DEFAULT_CRASH_COMPONENT + + +class SocorroDataAnalyzer(socorro_util.SignatureStats): + """Analyze the data returned by Socorro.""" + + _bugzilla_os_legal_values = None + _bugzilla_cpu_legal_values_map = None + _platforms = [ + {"short_name": "win", "name": "Windows"}, + {"short_name": "mac", "name": "Mac OS X"}, + {"short_name": "lin", "name": "Linux"}, + {"short_name": "and", "name": "Android"}, + {"short_name": "unknown", "name": "Unknown"}, + ] + + def __init__( + self, + signature: dict, + num_total_crashes: int, + ): + super().__init__(signature, num_total_crashes, platforms=self._platforms) + + @classmethod + def to_bugzilla_op_sys(cls, op_sys: str) -> str: + """Return the corresponding OS name in Bugzilla for the provided OS name + from Socorro. + + If the OS name is not recognized, return "Other". + """ + if cls._bugzilla_os_legal_values is None: + cls._bugzilla_os_legal_values = set( + bugzilla.BugFields.fetch_field_values("op_sys") + ) + + if op_sys in cls._bugzilla_os_legal_values: + return op_sys + + if op_sys.startswith("OS X ") or op_sys.startswith("macOS "): + op_sys = "macOS" + elif op_sys.startswith("Windows"): + op_sys = "Windows" + elif "Linux" in op_sys or op_sys.startswith("Ubuntu"): + op_sys = "Linux" + else: + op_sys = "Other" + + return op_sys + + @property + def bugzilla_op_sys(self) -> str: + """The name of the OS where the crash happens. + + The value is one of the legal values for Bugzilla's `op_sys` field. + + - If no OS name is found, the value will be "Unspecified". + - If the OS name is not recognized, the value will be "Other". + - If multiple OS names are found, the value will be "All". Unless the OS + names can be resolved to a common name without a version. For example, + "Windows 10" and "Windows 7" will become "Windows". + """ + all_op_sys = { + self.to_bugzilla_op_sys(op_sys["term"]) + for op_sys in self.signature["facets"]["platform_pretty_version"] + } + + if len(all_op_sys) > 1: + # Resolve to root OS name by removing the version number. + all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys} + + if len(all_op_sys) == 2 and "Other" in all_op_sys: + # TODO: explain this workaround. + all_op_sys.remove("Other") + + if len(all_op_sys) == 1: + return next(iter(all_op_sys)) + + if len(all_op_sys) == 0: + return "Unspecified" + + return "All" + + @classmethod + def to_bugzilla_cpu(cls, cpu: str) -> str: + """Return the corresponding CPU name in Bugzilla for the provided name + from Socorro. + + If the CPU is not recognized, return "Other". + """ + if cls._bugzilla_cpu_legal_values_map is None: + cls._bugzilla_cpu_legal_values_map = { + value.lower(): value + for value in bugzilla.BugFields.fetch_field_values("rep_platform") + } + + return cls._bugzilla_cpu_legal_values_map.get(cpu, "Other") + + @property + def bugzilla_cpu_arch(self) -> str: + """The CPU architecture of the devices where the crash happens. + + The value is one of the legal values for Bugzilla's `rep_platform` field. + + - If no CPU architecture is found, the value will be "Unspecified". + - If the CPU architecture is not recognized, the value will be "Other". + - If multiple CPU architectures are found, the value will "All". + """ + all_cpu_arch = { + self.to_bugzilla_cpu(cpu["term"]) + for cpu in self.signature["facets"]["cpu_arch"] + } + + if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch: + all_cpu_arch.remove("Other") + + if len(all_cpu_arch) == 1: + return next(iter(all_cpu_arch)) + + if len(all_cpu_arch) == 0: + return "Unspecified" + + return "All" + + @property + def user_comments_page_url(self) -> str: + """The URL to the Signature page on Socorro where the Comments tab is + selected. + """ + start_date = date.today() - timedelta(weeks=26) + params = { + "signature": self.signature_term, + "date": socorro.SuperSearch.get_search_date(start_date), + } + return generate_signature_page_url(params, "comments") + + @property + def num_user_comments(self) -> int: + """The number of crash reports with user comments.""" + # TODO: count useful/interesting user comments (e.g., exclude one word comments) + return self.signature["facets"]["cardinality_user_comments"]["value"] + + @property + def has_user_comments(self) -> bool: + """Whether the crash signature has any reports with a user comment.""" + return self.num_user_comments > 0 + + @property + def top_proto_signature(self) -> str: + """The proto signature that occurs the most.""" + return self.signature["facets"]["proto_signature"][0]["term"] + + @property + def num_top_proto_signature_crashes(self) -> int: + """The number of crashes for the most occurring proto signature.""" + return self.signature["facets"]["proto_signature"][0]["count"] + + def _build_ids(self) -> Iterator[int]: + """Yields the build IDs where the crash occurred.""" + for build_id in self.signature["facets"]["build_id"]: + yield build_id["term"] + + @property + def top_build_id(self) -> int: + """The build ID where most crashes occurred.""" + return self.signature["facets"]["build_id"][0]["term"] + + +class SignatureAnalyzer(SocorroDataAnalyzer, ClouseauDataAnalyzer): + """Analyze the data related to a signature. + + This includes data from Socorro and Clouseau. + """ + + def __init__( + self, + socorro_signature: dict, + num_total_crashes: int, + clouseau_reports: list[dict], + ): + SocorroDataAnalyzer.__init__(self, socorro_signature, num_total_crashes) + ClouseauDataAnalyzer.__init__(self, clouseau_reports) + + def _fetch_crash_reports( + self, + proto_signature: str, + build_id: int | Iterable[int], + limit: int = 1, + ) -> Iterator[dict]: + params = { + "proto_signature": "=" + proto_signature, + "build_id": build_id, + "_columns": [ + "uuid", + ], + "_results_number": limit, + } + + def handler(res: dict, data: dict): + data.update(res) + + data: dict = {} + socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait() + + yield from data["hits"] + + def fetch_representative_processed_crash(self) -> dict: + """Fetch a processed crash to represent the signature. + + This could fetch multiple processed crashes and return the one that is + most likely to be useful. + """ + limit_to_top_proto_signature = ( + self.num_top_proto_signature_crashes / self.num_crashes > 0.6 + ) + + reports = itertools.chain( + # Reports with a higher score from clouseau are more likely to be + # useful. + sorted( + self._clouseau_reports, + key=lambda report: report["max_score"], + reverse=True, + ), + # Next we try find reports from the top crashing build because they + # are likely to be representative. + self._fetch_crash_reports(self.top_proto_signature, self.top_build_id), + self._fetch_crash_reports(self.top_proto_signature, self._build_ids()), + ) + for report in reports: + uuid = report["uuid"] + processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid] + if ( + not limit_to_top_proto_signature + or processed_crash["proto_signature"] == self.top_proto_signature + ): + # TODO(investigate): maybe we should check if the stack is + # corrupted (ask gsvelto or willkg about how to detect that) + return processed_crash + + raise NoCrashReportFoundError( + f"No crash report found with the most frequent proto signature for {self.signature_term}." + ) + + +class SignaturesDataFetcher: + """Fetch the data related to the given signatures.""" + + MEMORY_ACCESS_ERROR_REASONS = ( + # On Windows: + "EXCEPTION_ACCESS_VIOLATION_READ", + "EXCEPTION_ACCESS_VIOLATION_WRITE", + "EXCEPTION_ACCESS_VIOLATION_EXEC" + # On Linux: + "SIGSEGV / SEGV_MAPERR", + "SIGSEGV / SEGV_ACCERR", + ) + + EXCLUDED_MOZ_REASON_STRINGS = ( + "MOZ_CRASH(OOM)", + "MOZ_CRASH(Out of memory)", + "out of memory", + "Shutdown hanging", + # TODO(investigate): do we need to exclude signatures that their reason + # contains `[unhandlable oom]`? + # Example: arena_t::InitChunk | arena_t::AllocRun | arena_t::MallocLarge | arena_t::Malloc | BaseAllocator::malloc | Allocator::malloc | PageMalloc + # "[unhandlable oom]", + ) + + # If any of the crash reason starts with any of the following, then it is + # Network or I/O error. + EXCLUDED_IO_ERROR_REASON_PREFIXES = ( + "EXCEPTION_IN_PAGE_ERROR_READ", + "EXCEPTION_IN_PAGE_ERROR_WRITE", + "EXCEPTION_IN_PAGE_ERROR_EXEC", + ) + + # TODO(investigate): do we need to exclude all these signatures prefixes? + EXCLUDED_SIGNATURE_PREFIXES = ( + "OOM | ", + "bad hardware | ", + "shutdownhang | ", + ) + + def __init__( + self, + signatures: Iterable[str], + product: str = "Firefox", + channel: str = "nightly", + ): + self._signatures = set(signatures) + self._product = product + self._channel = channel + + @classmethod + def find_new_actionable_crashes( + cls, + product: str, + channel: str, + days_to_check: int = 7, + days_without_crashes: int = 7, + ) -> "SignaturesDataFetcher": + """Find new actionable crashes. + + Args: + product: The product to check. + channel: The release channel to check. + days_to_check: The number of days to check for crashes. + days_without_crashes: The number of days without crashes before the + `days_to_check` to consider the signature new. + + Returns: + A list of actionable signatures. + """ + duration = days_to_check + days_without_crashes + end_date = lmdutils.get_date_ymd("today") + start_date = end_date - timedelta(duration) + earliest_allowed_date = lmdutils.get_date_str( + end_date - timedelta(days_to_check) + ) + date_range = socorro.SuperSearch.get_search_date(start_date, end_date) + + params = { + "product": product, + "release_channel": channel, + "date": date_range, + # TODO(investigate): should we do a local filter instead of the + # following (should we exclude the signature if one of the crashes + # is a shutdown hang?): + # If the `ipc_shutdown_state` or `shutdown_progress` field are + # non-empty then it's a shutdown hang. + "ipc_shutdown_state": "__null__", + "shutdown_progress": "__null__", + # TODO(investigate): should we use the following instead of the + # local filter. + # "oom_allocation_size": "!__null__", + "_aggs.signature": [ + "moz_crash_reason", + "reason", + "_histogram.date", + "_cardinality.install_time", + "_cardinality.oom_allocation_size", + ], + "_results_number": 0, + "_facets_size": 10000, + } + + def handler(search_resp: dict, data: list): + logger.debug( + "Total of %d signatures received from Socorro", + len(search_resp["facets"]["signature"]), + ) + + for crash in search_resp["facets"]["signature"]: + signature = crash["term"] + if any( + signature.startswith(excluded_prefix) + for excluded_prefix in cls.EXCLUDED_SIGNATURE_PREFIXES + ): + # Ignore signatures that start with any of the excluded prefixes. + continue + + facets = crash["facets"] + installations = facets["cardinality_install_time"]["value"] + if installations <= 1: + # Ignore crashes that only happen on one installation. + continue + + first_date = facets["histogram_date"][0]["term"] + if first_date < earliest_allowed_date: + # The crash is not new, skip it. + continue + + if any( + reason["term"].startswith(io_error_prefix) + for reason in facets["reason"] + for io_error_prefix in cls.EXCLUDED_IO_ERROR_REASON_PREFIXES + ): + # Ignore Network or I/O error crashes. + continue + + if crash["count"] < 20: + # For signatures with low volume, having multiple types of + # memory errors indicates potential bad hardware crashes. + num_memory_error_types = sum( + reason["term"] in cls.MEMORY_ACCESS_ERROR_REASONS + for reason in facets["reason"] + ) + if num_memory_error_types > 1: + # Potential bad hardware crash, skip it. + continue + + # TODO: Add a filter using the `possible_bit_flips_max_confidence` + # field to exclude bad hardware crashes. The filed is not available yet. + # See: https://bugzilla.mozilla.org/show_bug.cgi?id=1816669#c3 + + # TODO(investigate): is this needed since we are already + # filtering signatures that start with "OOM | " + if facets["cardinality_oom_allocation_size"]["value"]: + # If one of the crashes is an OOM crash, skip it. + continue + + # TODO(investigate): do we need to check for the `moz_crash_reason` + moz_crash_reasons = facets["moz_crash_reason"] + if moz_crash_reasons and any( + excluded_reason in reason["term"] + for reason in moz_crash_reasons + for excluded_reason in cls.EXCLUDED_MOZ_REASON_STRINGS + ): + continue + + data.append(signature) + + signatures: list = [] + socorro.SuperSearch( + params=params, + handler=handler, + handlerdata=signatures, + ).wait() + + logger.debug( + "Total of %d signatures left after applying the filtering criteria", + len(signatures), + ) + + return cls(signatures, product, channel) + + def fetch_clouseau_crash_reports(self) -> dict[str, list]: + """Fetch the crash reports data from Crash Clouseau.""" + signature_reports = clouseau.Reports.get_by_signatures( + self._signatures, + product=self._product, + channel=self._channel, + ) + + logger.debug( + "Total of %d signatures received from Clouseau", len(signature_reports) + ) + + return signature_reports + + def fetch_socorro_info(self) -> tuple[list[dict], int]: + """Fetch the signature data from Socorro.""" + # TODO(investigate): should we increase the duration to 6 months? + duration = timedelta(weeks=1) + end_date = lmdutils.get_date_ymd("today") + start_date = end_date - duration + date_range = socorro.SuperSearch.get_search_date(start_date, end_date) + + params = { + "product": self._product, + # TODO(investigate): should we included all release channels? + "release_channel": self._channel, + # TODO(investigate): should we limit based on the build date as well? + "date": date_range, + # TODO: split signatures into chunks to avoid very long query URLs + "signature": ["=" + signature for signature in self._signatures], + "_aggs.signature": [ + "build_id", + "cpu_arch", + "proto_signature", + "_cardinality.user_comments", + "cpu_arch", + "platform_pretty_version", + # The following are needed for SignatureStats: + "platform", + "is_garbage_collecting", + "_cardinality.install_time", + "startup_crash", + "_histogram.uptime", + "process_type", + ], + "_results_number": 0, + "_facets_size": 10000, + } + + def handler(search_results: dict, data: dict): + data["num_total_crashes"] = search_results["total"] + data["signatures"] = search_results["facets"]["signature"] + + data: dict = {} + socorro.SuperSearchUnredacted( + params=params, + handler=handler, + handlerdata=data, + ).wait() + + logger.debug( + "Fetch info from Socorro for %d signatures", len(data["signatures"]) + ) + + return data["signatures"], data["num_total_crashes"] + + def fetch_bugs(self, include_fields: list[str] = None) -> dict[str, list[dict]]: + """Fetch bugs that are filed against the given signatures.""" + + params_base: dict = { + "include_fields": [ + "cf_crash_signature", + ], + } + + if include_fields: + params_base["include_fields"].extend(include_fields) + + params_list = [] + for signatures_chunk in Connection.chunks(list(self._signatures), 30): + params = params_base.copy() + n = int(utils.get_last_field_num(params)) + params[f"f{n}"] = "OP" + params[f"j{n}"] = "OR" + for signature in signatures_chunk: + n += 1 + params[f"f{n}"] = "cf_crash_signature" + params[f"o{n}"] = "regexp" + params[f"v{n}"] = rf"\[(@ |@){re.escape(signature)}( \]|\])" + params[f"f{n+1}"] = "CP" + params_list.append(params) + + signatures_bugs: dict = defaultdict(list) + + def handler(res, data): + for bug in res["bugs"]: + for signature in utils.get_signatures(bug["cf_crash_signature"]): + if signature in self._signatures: + data[signature].append(bug) + + Bugzilla( + queries=[ + connection.Query(Bugzilla.API_URL, params, handler, signatures_bugs) + for params in params_list + ], + ).wait() + + # TODO: remove the call to DevBugzilla after moving to production + DevBugzilla( + queries=[ + connection.Query(DevBugzilla.API_URL, params, handler, signatures_bugs) + for params in params_list + ], + ).wait() + + logger.debug( + "Total of %d signatures already have bugs filed", len(signatures_bugs) + ) + + return signatures_bugs + + def analyze(self) -> list[SignatureAnalyzer]: + """Analyze the data related to the signatures.""" + bugs = self.fetch_bugs() + # TODO(investigate): For now, we are ignoring signatures that have bugs + # filed even if they are closed long time ago. We should investigate + # whether we should include the ones with closed bugs. For example, if + # the bug was closed as Fixed years ago. + self._signatures.difference_update(bugs.keys()) + + clouseau_reports = self.fetch_clouseau_crash_reports() + # TODO(investigate): For now, we are ignoring signatures that are not + # analyzed by clouseau. We should investigate why they are not analyzed + # and whether we should include them. + self._signatures.intersection_update(clouseau_reports.keys()) + + signatures, num_total_crashes = self.fetch_socorro_info() + logger.debug("Total of %d signatures will be analyzed", len(signatures)) + + return [ + SignatureAnalyzer( + signature, + num_total_crashes, + clouseau_reports[signature["term"]], + ) + for signature in signatures + ] diff --git a/bugbot/crash/socorro_util.py b/bugbot/crash/socorro_util.py new file mode 100644 index 00000000..9f423065 --- /dev/null +++ b/bugbot/crash/socorro_util.py @@ -0,0 +1,399 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +"""The code in this module was borrowed from Socorro (some parts were adjusted). +Each function, class, or dictionary is documented with a link to the original +source. +""" + + +import re +from functools import cached_property +from itertools import islice + + +# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/templatetags/jinja_helpers.py#L182-L203 +def generate_bug_description_data(report) -> dict: + crashing_thread = get_crashing_thread(report) + parsed_dump = get_parsed_dump(report) or {} + + frames = None + threads = parsed_dump.get("threads") + if threads: + thread_index = crashing_thread or 0 + frames = bugzilla_thread_frames(parsed_dump["threads"][thread_index]) + + return { + "uuid": report["uuid"], + # NOTE(willkg): this is the redacted stack trace--not the raw one that can + # have PII in it + "java_stack_trace": report.get("java_stack_trace", None), + # NOTE(willkg): this is the redacted mozcrashreason--not the raw one that + # can have PII in it + "moz_crash_reason": report.get("moz_crash_reason", None), + "reason": report.get("reason", None), + "frames": frames, + "crashing_thread": crashing_thread, + } + + +# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/templatetags/jinja_helpers.py#L227-L278 +def bugzilla_thread_frames(thread): + """Build frame information for bug creation link + + Extract frame info for the top frames of a crashing thread to be included in the + Bugzilla summary when reporting the crash. + + :arg thread: dict of thread information including "frames" list + + :returns: list of frame information dicts + + """ + + def frame_generator(thread): + """Yield frames in a thread factoring in inlines""" + for frame in thread["frames"]: + for inline in frame.get("inlines") or []: + yield { + "frame": frame.get("frame", "?"), + "module": frame.get("module", ""), + "signature": inline["function"], + "file": inline["file"], + "line": inline["line"], + } + + yield frame + + # We only want to include 10 frames in the link + MAX_FRAMES = 10 + + frames = [] + for frame in islice(frame_generator(thread), MAX_FRAMES): + # Source is an empty string if data isn't available + source = frame.get("file") or "" + if frame.get("line"): + source += ":{}".format(frame["line"]) + + signature = frame.get("signature") or "" + + # Remove function arguments + if not signature.startswith("(unloaded"): + signature = re.sub(r"\(.*\)", "", signature) + + frames.append( + { + "frame": frame.get("frame", "?"), + "module": frame.get("module") or "?", + "signature": signature, + "source": source, + } + ) + + return frames + + +# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/utils.py#L343-L359 +def enhance_json_dump(dump, vcs_mappings): + """ + Add some information to the stackwalker's json_dump output + for display. Mostly applying vcs_mappings to stack frames. + """ + for thread_index, thread in enumerate(dump.get("threads", [])): + if "thread" not in thread: + thread["thread"] = thread_index + + frames = thread["frames"] + for frame in frames: + enhance_frame(frame, vcs_mappings) + for inline in frame.get("inlines") or []: + enhance_frame(inline, vcs_mappings) + + thread["frames"] = frames + return dump + + +# https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/utils.py#L259-L340 +def enhance_frame(frame, vcs_mappings): + """Add additional info to a stack frame + + This adds signature and source links from vcs_mappings. + + """ + # If this is a truncation frame, then we don't need to enhance it in any way + if frame.get("truncated") is not None: + return + + if frame.get("function"): + # Remove spaces before all stars, ampersands, and commas + function = re.sub(r" (?=[\*&,])", "", frame["function"]) + # Ensure a space after commas + function = re.sub(r",(?! )", ", ", function) + frame["function"] = function + signature = function + elif frame.get("file") and frame.get("line"): + signature = "%s#%d" % (frame["file"], frame["line"]) + elif frame.get("module") and frame.get("module_offset"): + signature = "%s@%s" % ( + frame["module"], + strip_leading_zeros(frame["module_offset"]), + ) + elif frame.get("unloaded_modules"): + first_module = frame["unloaded_modules"][0] + if first_module.get("offsets"): + signature = "(unloaded %s@%s)" % ( + first_module.get("module") or "", + strip_leading_zeros(first_module.get("offsets")[0]), + ) + else: + signature = "(unloaded %s)" % first_module + else: + signature = "@%s" % frame["offset"] + + frame["signature"] = signature + if signature.startswith("(unloaded"): + # If the signature is based on an unloaded module, leave the string as is + frame["short_signature"] = signature + else: + # Remove arguments which are enclosed in parens + frame["short_signature"] = re.sub(r"\(.*\)", "", signature) + + if frame.get("file"): + vcsinfo = frame["file"].split(":") + if len(vcsinfo) == 4: + vcstype, root, vcs_source_file, revision = vcsinfo + if "/" in root: + # The root is something like 'hg.mozilla.org/mozilla-central' + server, repo = root.split("/", 1) + else: + # E.g. 'gecko-generated-sources' or something without a '/' + repo = server = root + + if ( + vcs_source_file.count("/") > 1 + and len(vcs_source_file.split("/")[0]) == 128 + ): + # In this case, the 'vcs_source_file' will be something like + # '{SHA-512 hex}/ipc/ipdl/PCompositorBridgeChild.cpp' + # So drop the sha part for the sake of the 'file' because + # we don't want to display a 128 character hex code in the + # hyperlink text. + vcs_source_file_display = "/".join(vcs_source_file.split("/")[1:]) + else: + # Leave it as is if it's not unwieldy long. + vcs_source_file_display = vcs_source_file + + if vcstype in vcs_mappings: + if server in vcs_mappings[vcstype]: + link = vcs_mappings[vcstype][server] + frame["file"] = vcs_source_file_display + frame["source_link"] = link % { + "repo": repo, + "file": vcs_source_file, + "revision": revision, + "line": frame["line"], + } + else: + path_parts = vcs_source_file.split("/") + frame["file"] = path_parts.pop() + + +# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/socorro/signature/utils.py#L405-L422 +def strip_leading_zeros(text): + """Strips leading zeros from a hex string. + + Example: + + >>> strip_leading_zeros("0x0000000000032ec0") + "0x32ec0" + + :param text: the text to strip leading zeros from + + :returns: stripped text + + """ + try: + return hex(int(text, base=16)) + except (ValueError, TypeError): + return text + + +# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/settings/base.py#L268-L293 +# Link to source if possible +VCS_MAPPINGS = { + "cvs": { + "cvs.mozilla.org": ( + "http://bonsai.mozilla.org/cvsblame.cgi?file=%(file)s&rev=%(revision)s&mark=%(line)s#%(line)s" + ) + }, + "hg": { + "hg.mozilla.org": ( + "https://hg.mozilla.org/%(repo)s/file/%(revision)s/%(file)s#l%(line)s" + ) + }, + "git": { + "git.mozilla.org": ( + "http://git.mozilla.org/?p=%(repo)s;a=blob;f=%(file)s;h=%(revision)s#l%(line)s" + ), + "github.com": ( + "https://github.com/%(repo)s/blob/%(revision)s/%(file)s#L%(line)s" + ), + }, + "s3": { + "gecko-generated-sources": ( + "/sources/highlight/?url=https://gecko-generated-sources.s3.amazonaws.com/%(file)s&line=%(line)s#L-%(line)s" + ) + }, +} + + +# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/views.py#L141-L153 +def get_parsed_dump(report): + # For C++/Rust crashes + if "json_dump" in report: + json_dump = report["json_dump"] + + # This is for displaying on the "Details" tab + enhance_json_dump(json_dump, VCS_MAPPINGS) + parsed_dump = json_dump + else: + parsed_dump = {} + + return parsed_dump + + +# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/views.py#L155-L160 +def get_crashing_thread(report): + if report["signature"].startswith("shutdownhang"): + # For shutdownhang signatures, we want to use thread 0 as the crashing thread, + # because that's the thread that actually contains the useful data about what + # happened. + return 0 + + return report.get("crashing_thread") + + +# Original Socorro code: https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/utils.py#L73-L195 +class SignatureStats: + def __init__( + self, + signature, + num_total_crashes, + rank=0, + platforms=None, + previous_signature=None, + ): + self.signature = signature + self.num_total_crashes = num_total_crashes + self.rank = rank + self.platforms = platforms + self.previous_signature = previous_signature + + @cached_property + def platform_codes(self): + return [x["short_name"] for x in self.platforms if x["short_name"] != "unknown"] + + @cached_property + def signature_term(self): + return self.signature["term"] + + @cached_property + def percent_of_total_crashes(self): + return 100.0 * self.signature["count"] / self.num_total_crashes + + @cached_property + def num_crashes(self): + return self.signature["count"] + + @cached_property + def num_crashes_per_platform(self): + num_crashes_per_platform = { + platform + "_count": 0 for platform in self.platform_codes + } + for platform in self.signature["facets"]["platform"]: + code = platform["term"][:3].lower() + if code in self.platform_codes: + num_crashes_per_platform[code + "_count"] = platform["count"] + return num_crashes_per_platform + + @cached_property + def num_crashes_in_garbage_collection(self): + num_crashes_in_garbage_collection = 0 + for row in self.signature["facets"]["is_garbage_collecting"]: + if row["term"].lower() == "t": + num_crashes_in_garbage_collection = row["count"] + return num_crashes_in_garbage_collection + + @cached_property + def num_installs(self): + return self.signature["facets"]["cardinality_install_time"]["value"] + + @cached_property + def percent_of_total_crashes_diff(self): + if self.previous_signature: + # The number should go "up" when moving towards 100 and "down" when moving + # towards 0 + return ( + self.percent_of_total_crashes + - self.previous_signature.percent_of_total_crashes + ) + return "new" + + @cached_property + def rank_diff(self): + if self.previous_signature: + # The number should go "up" when moving towards 1 and "down" when moving + # towards infinity + return self.previous_signature.rank - self.rank + return 0 + + @cached_property + def previous_percent_of_total_crashes(self): + if self.previous_signature: + return self.previous_signature.percent_of_total_crashes + return 0 + + @cached_property + def num_startup_crashes(self): + return sum( + row["count"] + for row in self.signature["facets"]["startup_crash"] + if row["term"] in ("T", "1") + ) + + @cached_property + def is_startup_crash(self): + return self.num_startup_crashes == self.num_crashes + + @cached_property + def is_potential_startup_crash(self): + return ( + self.num_startup_crashes > 0 and self.num_startup_crashes < self.num_crashes + ) + + @cached_property + def is_startup_window_crash(self): + is_startup_window_crash = False + for row in self.signature["facets"]["histogram_uptime"]: + # Aggregation buckets use the lowest value of the bucket as + # term. So for everything between 0 and 60 excluded, the + # term will be `0`. + if row["term"] < 60: + ratio = 1.0 * row["count"] / self.num_crashes + is_startup_window_crash = ratio > 0.5 + return is_startup_window_crash + + @cached_property + def is_plugin_crash(self): + for row in self.signature["facets"]["process_type"]: + if row["term"].lower() == "plugin": + return row["count"] > 0 + return False + + @cached_property + def is_startup_related_crash(self): + return ( + self.is_startup_crash + or self.is_potential_startup_crash + or self.is_startup_window_crash + ) diff --git a/bugbot/rules/file_crash_bug.py b/bugbot/rules/file_crash_bug.py new file mode 100644 index 00000000..af90db17 --- /dev/null +++ b/bugbot/rules/file_crash_bug.py @@ -0,0 +1,160 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +import pprint + +import jinja2 +import requests + +from bugbot import logger +from bugbot.bzcleaner import BzCleaner +from bugbot.crash import socorro_util +from bugbot.crash.analyzer import DevBugzilla, SignaturesDataFetcher + + +class FileCrashBug(BzCleaner): + """File bugs for new actionable crashes.""" + + # NOTE: If you make changes that affect the output of the rule, you should + # increment this number. This is needed in the experimental phase only. + VERSION = 1 + MAX_BUG_TITLE_LENGTH = 255 + + def __init__(self): + super().__init__() + + self.bug_description_template = jinja2.Environment( + loader=jinja2.FileSystemLoader("templates") + ).get_template("file_crash_bug_description.md.jinja") + + def description(self): + return "New actionable crashes" + + def columns(self): + return ["component", "id", "summary"] + + def get_bz_params(self, date): + return { + "resolution": ["---", "FIXED"], + "keywords": ["feature", "regression"], + "keywords_type": "allwords", + } + + def get_bugs(self, date): + self.query_url = None + bugs = {} + + signatures = SignaturesDataFetcher.find_new_actionable_crashes( + "Firefox", "nightly" + ) + + for signature in signatures.analyze(): + logger.debug("Generating bug for signature: %s", signature.signature_term) + + title = ( + f"Startup crash in [@ {signature.signature_term}]" + if signature.is_startup_related_crash + else f"Crash in [@ {signature.signature_term}]" + ) + if len(title) > self.MAX_BUG_TITLE_LENGTH: + title = title[: self.MAX_BUG_TITLE_LENGTH - 3] + "..." + + # TODO: Handle cases where the regressor is a security bug. In such + # cases, we may want to file the bug as security bug. + + flags = None + if signature.regressed_by: + # TODO: check user activity and if the ni? is open + flags = [ + { + "name": "needinfo", + "requestee": signature.regressed_by_author["name"], + "status": "?", + "new": "true", + } + ] + + report = signature.fetch_representative_processed_crash() + description = self.bug_description_template.render( + { + **socorro_util.generate_bug_description_data(report), + "signature": signature, + "needinfo_regression_author": bool(flags), + } + ) + + # TODO: Provide the following information: + # [X] Crash signature + # [X] Top 10 frames of crashing thread + # [X] Component + # [X] The kind of crash + # [ ] Regression window + # [X] Inducing patch + # [X] Reason + # [X] Regressed by + # [X] Platform + # [ ] Firefox status flags + # [ ] Severity + # [ ] Time correlation + # [X] User comments + # [ ] Crash address commonalities + # [ ] Estimated future crash volume + + bug_data = { + "blocks": "bugbot-auto-crash", + "type": "defect", + "keywords": ["crash"], + "status_whiteboard": f"[bugbot-crash-v{self.VERSION}]", + "summary": title, + "product": signature.crash_component.product, + "component": signature.crash_component.name, + "op_sys": signature.bugzilla_op_sys, + "rep_platform": signature.bugzilla_cpu_arch, + "cf_crash_signature": f"[@ {signature.signature_term}]", + "description": description, + # TODO: Uncomment the following lines when we move to file on + # the production instance of Bugzilla. Filling `regressed_by` or + # `flags` on bugzilla-dev will cause "bug does not exist" errors. + # "regressed_by": signature.regressed_by, + # "flags": flags, + } + + if self.dryrun: + logger.info("Dry-run bug:") + pprint.pprint(bug_data) + bug_id = str(len(bugs) + 1) + else: + # NOTE: When moving to production: + # - Use Bugzilla instead of DevBugzilla + # - Drop the DevBugzilla class + # - Update the bug URL `file_crash_bug.html` + # - Drop the bug link `file_crash_bug_description.md.jinja` + # - Fill the `regressed_by` and `flags` fields + # - Create the bug using `utils.create_bug`` + resp = requests.post( + url=DevBugzilla.API_URL, + json=bug_data, + headers=DevBugzilla([]).get_header(), + verify=True, + timeout=DevBugzilla.TIMEOUT, + ) + resp.raise_for_status() + bug = resp.json() + bug_id = str(bug["id"]) + # TODO: log the created bugs info somewhere (e.g., DB, + # spreadsheet, or LabelStudio) + + bugs[bug_id] = { + "id": bug_id, + "summary": title, + "component": signature.crash_component, + } + + logger.debug("Total of %d bugs have been filed", len(bugs)) + + return bugs + + +if __name__ == "__main__": + FileCrashBug().run() diff --git a/scripts/cron_run_hourly.sh b/scripts/cron_run_hourly.sh index 2373235a..7e83d380 100755 --- a/scripts/cron_run_hourly.sh +++ b/scripts/cron_run_hourly.sh @@ -73,4 +73,7 @@ python -m bugbot.rules.multifix_regression --production # Copy metadata from duplicates python -m bugbot.rules.duplicate_copy_metadata --production +# File bugs for new actionable crashes +python -m bugbot.rules.file_crash_bug --production + source ./scripts/cron_common_end.sh diff --git a/templates/file_crash_bug.html b/templates/file_crash_bug.html new file mode 100644 index 00000000..22fe2c34 --- /dev/null +++ b/templates/file_crash_bug.html @@ -0,0 +1,21 @@ +

BugBot filed the following crash {{ plural('bug', data) }}:

+ + + + + + + + + + {% for i, (comp, bugid, summary) in enumerate(data) -%} + + + + + + {% endfor -%} + +
ComponentBugSummary
{{ comp | e }} + {{ bugid }} + {{ summary | e }}
diff --git a/templates/file_crash_bug_description.md.jinja b/templates/file_crash_bug_description.md.jinja new file mode 100644 index 00000000..eaf6e5c8 --- /dev/null +++ b/templates/file_crash_bug_description.md.jinja @@ -0,0 +1,58 @@ +{# + Part of this template was extracted from Socorro's repository: + https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/crashstats/jinja2/crashstats/bug_comment.txt + https://github.com/mozilla-services/socorro/blob/ff8f5d6b41689e34a6b800577d8ffe383e1e62eb/webapp/crashstats/signature/jinja2/signature/signature_summary.html#L7-L41 +#} +Crash report: https://crash-stats.mozilla.org/report/index/{{ uuid }} + +{% if java_stack_trace %} +Java stack trace: +``` +{{ java_stack_trace|truncate(5000, True)|safe }} +``` +{% elif frames %} +{% if moz_crash_reason %} +MOZ_CRASH Reason: ```{{ moz_crash_reason|safe }}``` +{% elif reason %} +Reason: ```{{ reason|safe }}``` +{% endif %} +{% if crashing_thread is none %} +No crashing thread identified; using thread 0. +{% endif %} +Top {{ frames|length }} frames of crashing thread: +``` +{% for frame in frames -%} +{{ frame.frame|safe}} {{ frame.module|safe }} {{ frame.signature|safe }} {{ frame.source|safe }} +{% endfor -%} +``` +{% endif %} + + +Here are some insights about the crash signature based on recent data at the time of reporting the bug: + +- **Crash kind:** {{ "Plugin Crash" if signature.is_plugin_crash else "Browser Crash" }} +- **Is startup crash:** {{ "Yes - " if signature.is_startup_related_crash else "No" }} +{%- if signature.is_startup_crash -%} +all crashes happened during startup +{%- elif signature.is_potential_startup_crash -%} +{{ signature.num_startup_crashes }} out of {{ signature.num_crashes }} crashes happened during startup +{%- elif signature.is_startup_window_crash -%} +more than half of the crashes happened during the first minute after launch +{%- endif %} +- **Has user comments:** {% if signature.has_user_comments -%} +[Yes]({{ signature.user_comments_page_url }}) +{%- else -%} +No +{%- endif %} + + +{% if signature.regressed_by %} +By analyzing the backtrace, the regression may have been introduced by a {{ "patch [1]" if signature.regressed_by_patch else "patch"}} to fix [Bug {{ signature.regressed_by }}](https://bugzilla.mozilla.org/show_bug.cgi?id={{ signature.regressed_by }}). +{% if signature.regressed_by_patch %} +[1] https://hg.mozilla.org/mozilla-central/rev?node={{ signature.regressed_by_patch }} +{%- endif %} +{% endif %} + +{% if needinfo_regression_author %} +:{{ signature.regressed_by_author["nick"] }}, since you are the author of the potential regressor, could you please take a look? +{% endif %}