gecko-dev/dom/quota/scripts/stackanalysis.py

397 строки
13 KiB
Python

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# There seem to be sometimes identical events recorded twice by telemetry
def sanitize(rows):
newrows = []
pcid = "unset"
psid = "unset"
pseq = "unset"
for row in rows:
cid = row["client_id"]
sid = row["session_id"]
seq = row["seq"]
if cid != pcid or sid != psid or seq != pseq:
newrows.append(row)
pcid = cid
psid = sid
pseq = seq
return newrows
# Given a set of rows, find all distinct build ids
def extractBuildIDs(rows):
buildids = {}
for row in rows:
id = row["build_id"]
if id in buildids:
buildids[id] = buildids[id] + 1
else:
buildids[id] = 1
return buildids
# Given a set of build ids and rows, enrich each row by an hg link.
# Relys on the result of utils.fetchBuildRevisions in buildids.
def constructHGLinks(buildids, rows):
for row in rows:
id = row["build_id"]
if id in buildids:
row["location"] = (
buildids[id] + "/" + row["source_file"] + "#l" + row["source_line"]
)
else:
row["location"] = id + "/" + row["source_file"] + "#l" + row["source_line"]
topmost_stackframes = set()
delta_frames = {}
def isTopmostFrame(frame):
f = (frame["location"], frame["result"])
return f in topmost_stackframes
def addTopmostFrame(frame):
f = (frame["location"], frame["result"])
if not isTopmostFrame(frame):
# print("Found new topmost frame {}.".format(frame))
topmost_stackframes.add(f)
frame["topmost"] = True
def addFrameDelta(frame1, frame2):
if frame1["client_id"] != frame2["client_id"]:
return
if frame1["session_id"] != frame2["session_id"]:
return
fkey = "{}:{}-{}:{}".format(
frame2["location"], frame2["result"], frame1["location"], frame1["result"]
)
if fkey not in delta_frames:
fdelta = {"delta_sum": 0, "delta_cnt": 0}
fdelta["prev_row"] = frame1
fdelta["candidate"] = frame2
delta_frames[fkey] = fdelta
fdelta = delta_frames[fkey]
etv1 = frame1["event_timestamp"]
etv2 = frame2["event_timestamp"]
if isinstance(etv1, int) and isinstance(etv2, int) and etv2 > etv1:
delta = etv2 - etv1
fdelta["delta_sum"] = fdelta["delta_sum"] + delta
fdelta["delta_cnt"] = fdelta["delta_cnt"] + 1
# There can be outliers in terms of time distance between two stack frames
# that belong to the same propagation stack. In order to not increase the
# risk that one outlier breaks thousands of stacks, we check for the average
# time distance.
def checkAverageFrameTimeDeltas(rows, max_delta):
# print("checkAverageFrameTimeDeltas")
prev_row = None
for row in rows:
if "topmost" in row or not row["session_complete"]:
prev_row = None
continue
if prev_row:
addFrameDelta(prev_row, row)
prev_row = row
for fd in delta_frames:
sum = delta_frames[fd]["delta_sum"]
cnt = delta_frames[fd]["delta_cnt"]
if cnt > 0 and (sum / cnt) > max_delta:
# print(delta_frames[fd])
addTopmostFrame(delta_frames[fd]["candidate"])
# A topmost frame is considered to initiate a new raw stack. We collect all
# candidates before we actually apply them. This implies, that we should run
# this function on a "large enough" sample of rows to be more accurate.
# As a side effect, we mark all rows that are part of a "complete" session
# (a session, that started within our data scope).
def collectTopmostFrames(rows):
prev_cid = "unset"
prev_sid = "unset"
prev_tid = "unset"
prev_ctx = "unset"
prev_sev = "ERROR"
session_complete = False
after_severity_downgrade = False
for row in rows:
cid = row["client_id"]
sid = row["session_id"]
tid = row["seq"] >> 32 # thread_id
ctx = row["context"]
seq = row["seq"] & 0x00000000FFFFFFFF # seq
sev = row["severity"]
# If we have a new session, ensure it is complete from start,
# otherwise we will ignore it entirely.
if cid != prev_cid or sid != prev_sid or tid != prev_tid:
if seq == 1:
session_complete = True
else:
session_complete = False
row["session_complete"] = session_complete
if session_complete:
# If we change client, session, thread or context, we can be sure to have
# a new topmost frame.
if (
seq == 1
or cid != prev_cid
or sid != prev_sid
or tid != prev_tid
or ctx != prev_ctx
):
addTopmostFrame(row)
after_severity_downgrade = False
# We do not expect a non-error to be ever upgraded to an error
elif sev == "ERROR" and prev_sev != "ERROR":
addTopmostFrame(row)
after_severity_downgrade = False
# If we just had a severity downgrade, we assume that we wanted
# to break the error propagation after this point and split, too
elif after_severity_downgrade:
addTopmostFrame(row)
after_severity_downgrade = False
elif prev_sev == "ERROR" and sev != "ERROR":
after_severity_downgrade = True
prev_cid = cid
prev_sid = sid
prev_tid = tid
prev_ctx = ctx
prev_sev = sev
# Should be ms. We've seen quite some runtime between stackframes in the
# wild. We might want to consider to make this configurable. In general
# we prefer local context over letting slip through some topmost frame
# unrecognized, assuming that fixing the issues one by one they will
# uncover them succesively. This is achieved by a rather high delta value.
max_avg_delta = 200
checkAverageFrameTimeDeltas(rows, max_avg_delta)
def getFrameKey(frame):
return "{}.{}|".format(frame["location"], frame["result"])
def getStackKey(stack):
stack_key = ""
for frame in stack["frames"]:
stack_key += getFrameKey(frame)
return hash(stack_key)
# A "raw stack" is a list of frames, that:
# - share the same build_id (implicitely through location)
# - share the same client_id
# - share the same session_id
# - has a growing sequence number
# - stops at the first downgrade of severity from ERROR to else
# - XXX: contains each location at most once (no recursion)
# - appears to be in a reasonable short timeframe
# Calculates also a hash key to identify identical stacks
def collectRawStacks(rows):
collectTopmostFrames(rows)
raw_stacks = []
stack = {
"stack_id": "unset",
"client_id": "unset",
"session_id": "unset",
"submit_timeabs": "unset",
"frames": [{"location": "unset"}],
}
stack_id = 1
first = True
for row in rows:
if isTopmostFrame(row):
if not first:
stack["stack_key"] = getStackKey(stack)
raw_stacks.append(stack)
stack_id += 1
stack = {
"stack_id": stack_id,
"client_id": row["client_id"],
"session_id": row["session_id"],
"submit_timeabs": row["submit_timeabs"],
"context": row["context"],
"frames": [],
}
stack["frames"].append(
{
"location": row["location"],
"source_file": row["source_file"],
"source_line": row["source_line"],
"seq": row["seq"],
"severity": row["severity"],
"result": row["result"],
}
)
first = False
return raw_stacks
# Merge all stacks that have the same hash key and count occurences.
# Relys on the ordering per client_id/session_id for correct counting.
def mergeEqualStacks(raw_stacks):
merged_stacks = {}
last_client_id = "none"
last_session_id = "none"
for stack in raw_stacks:
stack_key = stack["stack_key"]
merged_stack = stack
if stack_key in merged_stacks:
merged_stack = merged_stacks[stack_key]
if stack["client_id"] != last_client_id:
last_client_id = stack["client_id"]
merged_stack["client_count"] += 1
if stack["session_id"] != last_session_id:
last_session_id = stack["session_id"]
merged_stack["session_count"] += 1
merged_stack["hit_count"] += 1
else:
merged_stack["client_count"] = 1
last_client_id = merged_stack["client_id"]
merged_stack["session_count"] = 1
last_session_id = merged_stack["session_id"]
merged_stack["hit_count"] = 1
merged_stacks[stack_key] = merged_stack
merged_list = list(merged_stacks.values())
merged_list.sort(key=lambda x: x.get("hit_count"), reverse=True)
return merged_list
# Split the list of stacks into:
# - aborted (has at least one frame with NS_ERROR_ABORT)
# - info/warning (has at least one frame with that severity)
# - error (has only error frames)
def filterStacksForPropagation(
all_stacks, error_stacks, warn_stacks, info_stacks, abort_stacks
):
for stack in all_stacks:
warn = list(filter(lambda x: x["severity"] == "WARNING", stack["frames"]))
info = list(filter(lambda x: x["severity"] == "INFO", stack["frames"]))
abort = list(filter(lambda x: x["result"] == "NS_ERROR_ABORT", stack["frames"]))
if len(abort) > 0:
abort_stacks.append(stack)
elif len(info) > 0:
info_stacks.append(stack)
elif len(warn) > 0:
warn_stacks.append(stack)
else:
error_stacks.append(stack)
# Bugzilla comment markup
def printStacks(stacks):
row_format = "{} | {} | {} | {} | {}\n"
out = ""
out += row_format.format("Clients", "Sessions", "Hits", "Anchor (Context)", "Stack")
out += row_format.format("-------", "--------", "----", "----------------", "-----")
for stack in stacks:
framestr = ""
first = True
for frame in stack["frames"]:
if not first:
framestr += " <- "
framestr += "[{}#{}:{}]({})".format(
frame["source_file"],
frame["source_line"],
frame["result"],
frame["location"],
)
first = False
out += row_format.format(
stack["client_count"],
stack["session_count"],
stack["hit_count"],
"{} ({})".format(stack["frames"][0]["anchor"], stack["context"]),
framestr,
)
return out
def groupStacksForAnchors(stacks):
anchors = {}
for stack in stacks:
anchor_name = stack["frames"][0]["anchor"]
if anchor_name in anchors:
anchors[anchor_name]["stacks"].append(stack)
else:
anchor = {"anchor": anchor_name, "stacks": [stack]}
anchors[anchor_name] = anchor
return anchors
"""
def getSummaryForAnchor(anchor):
return "[QM_TRY] Errors in function {}".format(anchor)
def searchBugForAnchor(bugzilla_key, anchor):
summary = getSummaryForAnchor(anchor)
bug_url = "https://bugzilla.mozilla.org/rest/bug?" \
"summary={}&api_key={}".format(summary, bugzilla_key)
return requests.get(url=bug_url).json()["bugs"]
def createBugForAnchor(bugzilla_key, anchor):
summary = getSummaryForAnchor(anchor)
bug_url = "https://bugzilla.mozilla.org/rest/bug?" \
"Bugzilla_api_key={}".format(bugzilla_key)
body = {
"product" : "Core",
"component" : "Storage: Quota Manager",
"version" : "unspecified",
"summary" : summary,
"description" : "This bug collects errors reported by QM_TRY"
"macros for function {}.".format(anchor),
}
resp = requests.post(url=bug_url, json=body)
if resp.status_code != 200:
print(resp)
return 0
id = resp.json()["id"]
print("Added new bug {}:".format(id))
return id
def ensureBugForAnchor(bugzilla_key, anchor):
buglist = searchBugForAnchor(bugzilla_key, anchor)
if (len(buglist) > 0):
id = buglist[0]["id"]
print("Found existing bug {}:".format(id))
return id
return createBugForAnchor(bugzilla_key, anchor)
def addCommentForAnchor(bugzilla_key, anchor, stacks):
id = ensureBugForAnchor(bugzilla_key, anchor)
if (id <= 0):
print("Unable to create a bug for {}.".format(anchor))
return
comment = printStacks(stacks)
print("")
print("Add comment to bug {}:".format(id))
print(comment)
def addCommentsForStacks(bugzilla_key, stacks):
anchors = groupStacksForAnchors(stacks)
for anchor in anchors:
addCommentForAnchor(bugzilla_key, anchors[anchor]["anchor"], anchors[anchor]["stacks"])
"""