Bug 1582898 - Split up the callgraph generation step r=jonco

Differential Revision: https://phabricator.services.mozilla.com/D46688
This commit is contained in:
Steve Fink 2021-10-18 20:34:39 +00:00
Родитель c69cbcabfe
Коммит 01d04a3348
9 изменённых файлов: 543 добавлений и 273 удалений

Просмотреть файл

@ -14,23 +14,25 @@ import argparse
import os
import subprocess
import sys
import re
try:
from shlex import quote
except ImportError:
from pipes import quote
# Python 2/3 version independence polyfills
anystring_t = str if sys.version_info[0] > 2 else basestring
def execfile(thefile, globals):
exec(compile(open(thefile).read(), filename=thefile, mode="exec"), globals)
try:
execfile
except Exception:
def execfile(thefile, globals):
exec(compile(open(thefile).read(), filename=thefile, mode="exec"), globals)
# Label a string as an output.
class Output(str):
pass
# Label a string as a pattern for multiple inputs.
class MultiInput(str):
pass
def env(config):
@ -45,19 +47,28 @@ def env(config):
def fill(command, config):
try:
return tuple(s % config for s in command)
except Exception:
print("Substitution failed:")
problems = []
for fragment in command:
try:
fragment % config
except Exception:
problems.append(fragment)
raise Exception(
"\n".join(["Substitution failed:"] + [" %s" % s for s in problems])
)
filled = []
for s in command:
try:
rep = s.format(**config)
except KeyError:
print("Substitution failed: %s" % s)
filled = None
break
if isinstance(s, Output):
filled.append(Output(rep))
elif isinstance(s, MultiInput):
N = int(config["jobs"])
for i in range(1, N + 1):
filled.append(rep.format(i=i, n=N))
else:
filled.append(rep)
if filled is None:
raise Exception("substitution failure")
return tuple(filled)
def print_command(command, outfile=None, env=None):
@ -85,173 +96,216 @@ def print_command(command, outfile=None, env=None):
print(output)
JOBS = {
'dbs': {
'command': [
'{analysis_scriptdir}/run_complete',
'--foreground',
'--no-logs',
'--build-root={objdir}',
'--wrap-dir={sixgill}/scripts/wrap_gcc',
'--work-dir=work',
'-b', '{sixgill_bin}',
'--buildcommand={buildcommand}',
'.'
],
'outputs': []
},
def generate_hazards(config, outfilename):
jobs = []
for i in range(int(config["jobs"])):
command = fill(
(
"%(js)s",
"%(analysis_scriptdir)s/analyzeRoots.js",
"%(gcFunctions_list)s",
"%(gcEdges)s",
"%(limitedFunctions_list)s",
"%(gcTypes)s",
"%(typeInfo)s",
str(i + 1),
"%(jobs)s",
"tmp.%s" % (i + 1,),
),
config,
)
outfile = "rootingHazards.%s" % (i + 1,)
output = open(outfile, "w")
if config["verbose"]:
print_command(command, outfile=outfile, env=env(config))
jobs.append((command, Popen(command, stdout=output, env=env(config))))
'list-dbs': {
'command': ['ls', '-l']
},
'rawcalls': {
'command': [
'{js}',
'{analysis_scriptdir}/computeCallgraph.js',
'{typeInfo}',
Output('rawcalls'),
'{i}', '{n}'
],
'multi-output': True,
'outputs': ['rawcalls.{i}.of.{n}']
},
'gcFunctions': {
'command': [
'{js}', '{analysis_scriptdir}/computeGCFunctions.js', MultiInput('{rawcalls}'),
'--outputs',
Output('callgraph'),
Output('gcFunctions'),
Output('gcFunctions_list'),
Output('gcEdges'),
Output('limitedFunctions_list')
],
'outputs': [
'callgraph.txt',
'gcFunctions.txt',
'gcFunctions.lst',
'gcEdges.txt',
'limitedFunctions.lst'
],
},
'gcTypes': {
'command': [
'{js}', '{analysis_scriptdir}/computeGCTypes.js',
Output('gcTypes'), Output('typeInfo')
],
'outputs': ['gcTypes.txt', 'typeInfo.txt']
},
'allFunctions': {
'command': ['{sixgill_bin}/xdbkeys', 'src_body.xdb'],
'redirect-output': 'allFunctions.txt'
},
'hazards': {
'command': [
'{js}',
'{analysis_scriptdir}/analyzeRoots.js',
'{gcFunctions_list}',
'{gcEdges}',
'{limitedFunctions_list}',
'{gcTypes}',
'{typeInfo}',
'{i}', '{n}',
'tmp.{i}.of.{n}'
],
'multi-output': True,
'redirect-output': 'rootingHazards.{i}.of.{n}'
},
'gather-hazards': {
'command': ['cat', MultiInput('{hazards}')],
'redirect-output': 'rootingHazards.txt'
},
'explain': {
'command': [
sys.executable,
'{analysis_scriptdir}/explain.py',
'{gather-hazards}',
'{gcFunctions}',
Output('explained_hazards'), Output('unnecessary'), Output('refs')
],
'outputs': ['hazards.txt', 'unnecessary.txt', 'refs.txt']
},
'heapwrites': {
'command': ['{js}', '{analysis_scriptdir}/analyzeHeapWrites.js'],
'redirect-output': 'heapWriteHazards.txt'
}
}
# Generator of (i, j, item) tuples:
# - i is just the index of the yielded tuple (a la enumerate())
# - j is the index of the item in the command list
# - item is command[j]
def out_indexes(command):
i = 0
for (j, fragment) in enumerate(command):
if isinstance(fragment, Output):
yield (i, j, fragment)
i += 1
def run_job(name, config):
job = JOBS[name]
outs = job.get("outputs") or job.get("redirect-output")
print("Running " + name + " to generate " + str(outs))
if "function" in job:
job["function"](config, job["redirect-output"])
return
N = int(config["jobs"]) if job.get("multi-output") else 1
config["n"] = N
jobs = {}
for i in range(1, N + 1):
config["i"] = i
cmd = fill(job["command"], config)
info = spawn_command(cmd, job, name, config)
jobs[info["proc"].pid] = info
final_status = 0
while jobs:
pid, status = os.wait()
jobs = [job for job in jobs if job[1].pid != pid]
final_status = final_status or status
info = jobs[pid]
del jobs[pid]
if "redirect" in info:
info["redirect"].close()
if final_status:
raise subprocess.CalledProcessError(final_status, "analyzeRoots.js")
with open(outfilename, "w") as output:
command = ["cat"] + [
"rootingHazards.%s" % (i + 1,) for i in range(int(config["jobs"]))
]
if config["verbose"]:
print_command(command, outfile=outfilename)
subprocess.call(command, stdout=output)
JOBS = {
"dbs": (
(
"%(analysis_scriptdir)s/run_complete",
"--foreground",
"--no-logs",
"--build-root=%(objdir)s",
"--wrap-dir=%(sixgill)s/scripts/wrap_gcc",
"--work-dir=work",
"-b",
"%(sixgill_bin)s",
"--buildcommand=%(buildcommand)s",
".",
),
(),
),
"list-dbs": (("ls", "-l"), ()),
"callgraph": (
(
"%(js)s",
"%(analysis_scriptdir)s/computeCallgraph.js",
"%(typeInfo)s",
"[callgraph]",
),
("callgraph.txt",),
),
"gcFunctions": (
(
"%(js)s",
"%(analysis_scriptdir)s/computeGCFunctions.js",
"%(callgraph)s",
"[gcFunctions]",
"[gcFunctions_list]",
"[gcEdges]",
"[limitedFunctions_list]",
),
("gcFunctions.txt", "gcFunctions.lst", "gcEdges.txt", "limitedFunctions.lst"),
),
"gcTypes": (
(
"%(js)s",
"%(analysis_scriptdir)s/computeGCTypes.js",
"[gcTypes]",
"[typeInfo]",
),
("gcTypes.txt", "typeInfo.txt"),
),
"allFunctions": (
(
"%(sixgill_bin)s/xdbkeys",
"src_body.xdb",
),
"allFunctions.txt",
),
"hazards": (generate_hazards, "rootingHazards.txt"),
"explain": (
(
sys.executable,
"%(analysis_scriptdir)s/explain.py",
"%(hazards)s",
"%(gcFunctions)s",
"[explained_hazards]",
"[unnecessary]",
"[refs]",
),
("hazards.txt", "unnecessary.txt", "refs.txt"),
),
"heapwrites": (
("%(js)s", "%(analysis_scriptdir)s/analyzeHeapWrites.js"),
"heapWriteHazards.txt",
),
}
def out_indexes(command):
for i in range(len(command)):
m = re.match(r"^\[(.*)\]$", command[i])
if m:
yield (i, m.group(1))
def run_job(name, config):
cmdspec, outfiles = JOBS[name]
print("Running " + name + " to generate " + str(outfiles))
if hasattr(cmdspec, "__call__"):
cmdspec(config, outfiles)
else:
temp_map = {}
cmdspec = fill(cmdspec, config)
if isinstance(outfiles, anystring_t):
stdout_filename = "%s.tmp" % name
temp_map[stdout_filename] = outfiles
if config["verbose"]:
print_command(cmdspec, outfile=outfiles, env=env(config))
else:
stdout_filename = None
pc = list(cmdspec)
outfile = 0
for (i, name) in out_indexes(cmdspec):
pc[i] = outfiles[outfile]
outfile += 1
if config["verbose"]:
print_command(pc, env=env(config))
command = list(cmdspec)
outfile = 0
for (i, name) in out_indexes(cmdspec):
command[i] = "%s.tmp" % name
temp_map[command[i]] = outfiles[outfile]
outfile += 1
sys.stdout.flush()
if stdout_filename is None:
subprocess.check_call(command, env=env(config))
else:
with open(stdout_filename, "w") as output:
subprocess.check_call(command, stdout=output, env=env(config))
for (temp, final) in temp_map.items():
# Rename the temporary files to their final names.
for (temp, final) in info["rename_map"].items():
try:
if config["verbose"]:
print("Renaming %s -> %s" % (temp, final))
os.rename(temp, final)
except OSError:
print("Error renaming %s -> %s" % (temp, final))
raise
if final_status != 0:
raise Exception("job {} returned status {}".format(name, final_status))
def spawn_command(cmdspec, job, name, config):
rename_map = {}
if "redirect-output" in job:
stdout_filename = "{}.tmp{}".format(name, config.get("i", ""))
final_outfile = job["redirect-output"].format(**config)
rename_map[stdout_filename] = final_outfile
command = cmdspec
if config["verbose"]:
print_command(cmdspec, outfile=final_outfile, env=env(config))
else:
outfiles = job["outputs"]
outfiles = fill(outfiles, config)
stdout_filename = None
# To print the supposedly-executed command, replace the Outputs in the
# command with final output file names. (The actual command will be
# using temporary files that get renamed at the end.)
if config["verbose"]:
pc = list(cmdspec)
for (i, j, name) in out_indexes(cmdspec):
pc[j] = outfiles[i]
print_command(pc, env=env(config))
# Replace the Outputs with temporary filenames, and record a mapping
# from those temp names to their actual final names that will be used
# if the command succeeds.
command = list(cmdspec)
for (i, j, name) in out_indexes(cmdspec):
command[j] = "{}.tmp{}".format(name, config.get("i", ""))
rename_map[command[j]] = outfiles[i]
sys.stdout.flush()
info = {"rename_map": rename_map}
if stdout_filename:
info["redirect"] = open(stdout_filename, "w")
info["proc"] = Popen(command, stdout=info["redirect"], env=env(config))
else:
info["proc"] = Popen(command, env=env(config))
if config["verbose"]:
print("Spawned process {}".format(info["proc"].pid))
return info
# Default to conservatively assuming 4GB/job.
def max_parallel_jobs(job_size=4 * 2 ** 30):
"""Return the max number of parallel jobs we can run without overfilling
memory, assuming heavyweight jobs."""
from_cores = int(subprocess.check_output(["nproc", "--ignore=1"]).strip())
mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
from_mem = round(mem_bytes / job_size)
return min(from_cores, from_mem)
config = {"analysis_scriptdir": os.path.dirname(__file__)}
@ -264,7 +318,7 @@ parser = argparse.ArgumentParser(
description="Statically analyze build tree for rooting hazards."
)
parser.add_argument(
"step", metavar="STEP", type=str, nargs="?", help="run starting from this step"
"step", metavar="STEP", type=str, nargs="?", help="run only step STEP"
)
parser.add_argument(
"--source", metavar="SOURCE", type=str, nargs="?", help="source code to analyze"
@ -284,7 +338,14 @@ parser.add_argument(
help="full path to ctypes-capable JS shell",
)
parser.add_argument(
"--upto", metavar="UPTO", type=str, nargs="?", help="last step to execute"
"--first",
metavar="STEP",
type=str,
nargs="?",
help="execute all jobs starting with STEP",
)
parser.add_argument(
"--last", metavar="STEP", type=str, nargs="?", help="stop at step STEP"
)
parser.add_argument(
"--jobs",
@ -350,14 +411,14 @@ if args.tag and not args.buildcommand:
if args.jobs is not None:
data["jobs"] = args.jobs
if not data.get("jobs"):
data["jobs"] = int(subprocess.check_output(["nproc", "--ignore=1"]).strip())
data["jobs"] = max_parallel_jobs()
if args.buildcommand:
data["buildcommand"] = args.buildcommand
elif "BUILD" in os.environ:
data["buildcommand"] = os.environ["BUILD"]
else:
data["buildcommand"] = "make -j4 -s"
data["buildcommand"] = "make -j{} -s".format(data["jobs"])
if "ANALYZED_OBJDIR" in os.environ:
data["objdir"] = os.environ["ANALYZED_OBJDIR"]
@ -370,45 +431,56 @@ if "SOURCE" in os.environ:
steps = [
"dbs",
"gcTypes",
"callgraph",
"rawcalls",
"gcFunctions",
"allFunctions",
"hazards",
"gather-hazards",
"explain",
"heapwrites",
]
if args.list:
for step in steps:
command, outfilename = JOBS[step]
if outfilename:
print("%s -> %s" % (step, outfilename))
job = JOBS[step]
outfiles = job.get("outputs") or job.get("redirect-output")
if outfiles:
print(
"%s\n ->%s %s"
% (step, "*" if job.get("multi-output") else "", outfiles)
)
else:
print(step)
sys.exit(0)
for step in steps:
command, outfiles = JOBS[step]
if isinstance(outfiles, anystring_t):
data[step] = outfiles
else:
outfile = 0
for (i, name) in out_indexes(command):
data[name] = outfiles[outfile]
outfile += 1
job = JOBS[step]
if "redirect-output" in job:
data[step] = job["redirect-output"]
elif "outputs" in job and "command" in job:
outfiles = job["outputs"]
for (i, j, name) in out_indexes(job["command"]):
data[name] = outfiles[i]
num_outputs = len(list(out_indexes(job["command"])))
assert (
len(outfiles) == outfile
), "step '%s': mismatched number of output files (%d) and params (%d)" % (
len(outfiles) == num_outputs
), 'step "%s": mismatched number of output files (%d) and params (%d)' % (
step,
outfile,
num_outputs,
len(outfiles),
) # NOQA: E501
if args.step:
steps = steps[steps.index(args.step) :]
if args.upto:
steps = steps[: steps.index(args.upto) + 1]
if args.first or args.last:
raise Exception(
"--first and --last cannot be used when a step argument is given"
)
steps = [args.step]
else:
if args.first:
steps = steps[steps.index(args.first) :]
if args.last:
steps = steps[: steps.index(args.last) + 1]
for step in steps:
run_job(step, data)

Просмотреть файл

@ -1114,10 +1114,8 @@ xdb.open("src_body.xdb");
var minStream = xdb.min_data_stream()|0;
var maxStream = xdb.max_data_stream()|0;
var N = (maxStream - minStream) + 1;
var start = Math.floor((batch - 1) / numBatches * N) + minStream;
var start_next = Math.floor(batch / numBatches * N) + minStream;
var end = start_next - 1;
var start = batchStart(batch, numBatches, minStream, maxStream);
var end = batchLast(batch, numBatches, minStream, maxStream);
function process(name, json) {
functionName = name;

Просмотреть файл

@ -40,8 +40,8 @@ function indirectCallCannotGC(fullCaller, fullVariable)
// This is usually a simple variable name, but sometimes a full name gets
// passed through. And sometimes that name is truncated. Examples:
// _ZL13gAbortHandler|mozalloc_oom.cpp:void (* gAbortHandler)(size_t)
// _ZL14pMutexUnlockFn|umutex.cpp:void (* pMutexUnlockFn)(const void*
// _ZL13gAbortHandler$mozalloc_oom.cpp:void (* gAbortHandler)(size_t)
// _ZL14pMutexUnlockFn$umutex.cpp:void (* pMutexUnlockFn)(const void*
var name = readable(fullVariable);
if (name in ignoreIndirectCalls)
@ -296,7 +296,7 @@ var ignoreFunctions = {
"void mozilla::dom::JSStreamConsumer::~JSStreamConsumer() [[base_dtor]]": true,
};
function extraGCFunctions() {
function extraGCFunctions(readableNames) {
return ["ffi_call"].filter(f => f in readableNames);
}
@ -323,7 +323,7 @@ function isICU(name)
name.match(/u(prv_malloc|prv_realloc|prv_free|case_toFullLower)_\d+/)
}
function ignoreGCFunction(mangled)
function ignoreGCFunction(mangled, readableNames)
{
// Field calls will not be in readableNames
if (!(mangled in readableNames))

Просмотреть файл

@ -15,7 +15,9 @@ if (scriptArgs[0] == '--function' || scriptArgs[0] == '-f') {
}
var typeInfo_filename = scriptArgs[0] || "typeInfo.txt";
var callgraphOut_filename = scriptArgs[1] || "callgraph.txt";
var callgraphOut_filename = scriptArgs[1] || "rawcalls.txt";
var batch = (scriptArgs[2]|0) || 1;
var numBatches = (scriptArgs[3]|0) || 1;
var origOut = os.file.redirect(callgraphOut_filename);
@ -388,7 +390,10 @@ function process(functionName, functionBodies)
printOnce(`D ${functionId("(js-code)")} ${functionId(functionName)}`);
}
for (var nameIndex = minStream; nameIndex <= maxStream; nameIndex++) {
var start = batchStart(batch, numBatches, minStream, maxStream);
var end = batchLast(batch, numBatches, minStream, maxStream);
for (var nameIndex = start; nameIndex <= end; nameIndex++) {
var name = xdb.read_key(nameIndex);
var data = xdb.read_entry(name);
process(name.readString(), JSON.parse(data.readString()));

Просмотреть файл

@ -9,24 +9,43 @@ loadRelativeToScript('utility.js');
loadRelativeToScript('annotations.js');
loadRelativeToScript('loadCallgraph.js');
function usage() {
throw "Usage: computeGCFunctions.js <rawcalls1.txt> <rawcalls2.txt>... --outputs <out:callgraph.txt> <out:gcFunctions.txt> <out:gcFunctions.lst> <out:gcEdges.txt> <out:limitedFunctions.lst>";
}
if (typeof scriptArgs[0] != 'string')
throw "Usage: computeGCFunctions.js <callgraph.txt> <out:gcFunctions.txt> <out:gcFunctions.lst> <out:gcEdges.txt> <out:limitedFunctions.lst>";
usage();
var start = "Time: " + new Date;
var callgraph_filename = scriptArgs[0];
var gcFunctions_filename = scriptArgs[1] || "gcFunctions.txt";
var gcFunctionsList_filename = scriptArgs[2] || "gcFunctions.lst";
var gcEdges_filename = scriptArgs[3] || "gcEdges.txt";
var rawcalls_filenames = [];
while (scriptArgs.length) {
const arg = scriptArgs.shift();
if (arg == '--outputs')
break;
rawcalls_filenames.push(arg);
}
if (scriptArgs.length == 0)
usage();
var callgraph_filename = scriptArgs[0] || "callgraph.txt";
var gcFunctions_filename = scriptArgs[1] || "gcFunctions.txt";
var gcFunctionsList_filename = scriptArgs[2] || "gcFunctions.lst";
var gcEdges_filename = scriptArgs[3] || "gcEdges.txt";
var limitedFunctionsList_filename = scriptArgs[4] || "limitedFunctions.lst";
var gcFunctions = loadCallgraph(callgraph_filename);
var {
gcFunctions,
functions,
calleesOf,
limitedFunctions
} = loadCallgraph(rawcalls_filenames);
printErr("Writing " + gcFunctions_filename);
redirect(gcFunctions_filename);
for (var name in gcFunctions) {
for (const readable of (readableNames[name] || [name])) {
for (let readable of (functions.readableName[name] || [name])) {
print("");
const fullname = (name == readable) ? name : name + "$" + readable;
print("GC Function: " + fullname);
@ -35,8 +54,8 @@ for (var name in gcFunctions) {
current = gcFunctions[current];
if (current === 'internal')
; // Hit the end
else if (current in readableNames)
print(" " + readableNames[current][0]);
else if (current in functions.readableName)
print(" " + functions.readableName[current][0]);
else
print(" " + current);
} while (current in gcFunctions);
@ -46,8 +65,8 @@ for (var name in gcFunctions) {
printErr("Writing " + gcFunctionsList_filename);
redirect(gcFunctionsList_filename);
for (var name in gcFunctions) {
if (name in readableNames) {
for (var readable of readableNames[name])
if (name in functions.readableName) {
for (var readable of functions.readableName[name])
print(name + "$" + readable);
} else {
print(name);
@ -75,3 +94,7 @@ for (var block in gcEdges) {
printErr("Writing " + limitedFunctionsList_filename);
redirect(limitedFunctionsList_filename);
print(JSON.stringify(limitedFunctions, null, 4));
printErr("Writing " + callgraph_filename);
redirect(callgraph_filename);
saveCallgraph(functions, calleesOf);

Просмотреть файл

@ -33,23 +33,16 @@ loadRelativeToScript('callgraph.js');
// consider the mangled name. And some of the names encoded in callgraph.txt
// are FieldCalls, not just function names.
var readableNames = {}; // map from mangled name => list of readable names
var limitedFunctions = {}; // set of mangled names (map from mangled name => [any,all intsets])
var gcEdges = {};
// "Map" from identifier to mangled name, or sometimes to a Class.Field name.
var functionNames = [""];
var mangledToId = {};
// Returns whether the function was added. (It will be refused if it was
// already there, or if attrs or annotations say it shouldn't be added.)
function addGCFunction(caller, reason, gcFunctions, functionAttrs)
function addGCFunction(caller, reason, gcFunctions, functionAttrs, functions)
{
if (functionAttrs[caller] && functionAttrs[caller][1] & ATTR_GC_SUPPRESSED)
return false;
if (ignoreGCFunction(functionNames[caller]))
if (ignoreGCFunction(functions.name[caller], functions.readableName))
return false;
if (!(caller in gcFunctions)) {
@ -74,8 +67,7 @@ function generate_callgraph(rawCallees) {
const callersOf = new Map();
const calleesOf = new Map();
for (const [caller_prop, callee_attrs] of Object.entries(rawCallees)) {
const caller = caller_prop|0;
for (const [caller, callee_attrs] of rawCallees) {
const ordered_callees = [];
// callee_attrs is a list of {callee,any,all} objects.
@ -115,8 +107,18 @@ function generate_callgraph(rawCallees) {
}
// Returns object mapping mangled => reason for GCing
function loadCallgraph(file)
function loadRawCallgraphFile(file)
{
const functions = {
// "Map" from identifier to mangled name, or sometimes to a Class.Field name.
name: [""],
// map from mangled name => list of readable names
readableName: {},
mangledToId: {}
};
const fieldCallAttrs = {};
const fieldCallCSU = new Map(); // map from full field name id => csu name
@ -126,7 +128,8 @@ function loadCallgraph(file)
const gcCalls = [];
const indirectCalls = [];
const rawCallees = {}; // map from mangled => list of tuples of {'callee':mangled, 'any':intset, 'all':intset}
// map from mangled => list of tuples of {'callee':mangled, 'any':intset, 'all':intset}
const rawCallees = new Map();
for (let line of readFileLines_gen(file)) {
line = line.replace(/\n/, "");
@ -134,18 +137,18 @@ function loadCallgraph(file)
let match;
if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) {
const [ _, id, mangled ] = match;
assert(functionNames.length == id);
functionNames.push(mangled);
mangledToId[mangled] = id;
assert(functions.name.length == id);
functions.name.push(mangled);
functions.mangledToId[mangled] = id|0;
continue;
}
if (match = line.charAt(0) == "=" && /^= (\d+) (.*)/.exec(line)) {
const [ _, id, readable ] = match;
const mangled = functionNames[id];
if (mangled in readableNames)
readableNames[mangled].push(readable);
const mangled = functions.name[id];
if (mangled in functions.readableName)
functions.readableName[mangled].push(readable);
else
readableNames[mangled] = [ readable ];
functions.readableName[mangled] = [ readable ];
continue;
}
@ -165,7 +168,7 @@ function loadCallgraph(file)
if (match = tag == 'I' && /^I (\d+) VARIABLE ([^\,]*)/.exec(line)) {
const caller = match[1]|0;
const name = match[2];
if (indirectCallCannotGC(functionNames[caller], name))
if (indirectCallCannotGC(functions.name[caller], name))
attrs |= ATTR_GC_SUPPRESSED;
indirectCalls.push([caller, "IndirectCall: " + name, attrs]);
} else if (match = tag == 'F' && /^F (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) {
@ -173,23 +176,23 @@ function loadCallgraph(file)
const fullfield = match[2]|0;
const csu = match[3];
const fullfield_str = csu + "." + match[4];
assert(functionNames[fullfield] == fullfield_str);
assert(functions.name[fullfield] == fullfield_str);
if (attrs)
fieldCallAttrs[fullfield] = attrs;
addToKeyedList(rawCallees, caller, {callee:fullfield, any:attrs, all:attrs});
addToMappedList(rawCallees, caller, {callee:fullfield, any:attrs, all:attrs});
fieldCallCSU.set(fullfield, csu);
if (fieldCallCannotGC(csu, fullfield_str))
addToKeyedList(rawCallees, fullfield, {callee:ID.nogcfunc, any:0, all:0});
addToMappedList(rawCallees, fullfield, {callee:ID.nogcfunc, any:0, all:0});
else
addToKeyedList(rawCallees, fullfield, {callee:ID.anyfunc, any:0, all:0});
addToMappedList(rawCallees, fullfield, {callee:ID.anyfunc, any:0, all:0});
} else if (match = tag == 'V' && /^V (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) {
// V tag is no longer used, but we are still emitting it becasue it
// can be helpful to understand what's going on.
} else if (match = tag == 'D' && /^D (\d+) (\d+)/.exec(line)) {
const caller = match[1]|0;
const callee = match[2]|0;
addToKeyedList(rawCallees, caller, {callee, any:attrs, all:attrs});
addToMappedList(rawCallees, caller, {callee, any:attrs, all:attrs});
} else if (match = tag == 'R' && /^R (\d+) (\d+)/.exec(line)) {
assert(false, "R tag is no longer used");
} else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) {
@ -202,17 +205,102 @@ function loadCallgraph(file)
}
}
assert(ID.jscode == mangledToId["(js-code)"]);
assert(ID.anyfunc == mangledToId["(any-function)"]);
assert(ID.nogcfunc == mangledToId["(nogc-function)"]);
assert(ID.gc == mangledToId["(GC)"]);
printErr("Loaded " + file);
addToKeyedList(rawCallees, mangledToId["(any-function)"], {callee:ID.gc, any:0, all:0});
return {
fieldCallAttrs,
fieldCallCSU,
gcCalls,
indirectCalls,
rawCallees,
functions
};
}
// Take a set of rawcalls filenames (as in, the raw callgraph data output by
// computeCallgraph.js) and combine them into a global callgraph, renumbering
// the IDs as needed.
function mergeRawCallgraphs(filenames) {
let d;
for (const filename of filenames) {
const raw = loadRawCallgraphFile(filename);
if (!d) {
d = raw;
continue;
}
const {
fieldCallAttrs,
fieldCallCSU,
gcCalls,
indirectCalls,
rawCallees,
functions
} = raw;
// Compute the ID mapping. Incoming functions that already have an ID
// will be mapped to that ID; new ones will allocate a fresh ID.
const remap = new Array(functions.name.length);
for (let i = 1; i < functions.name.length; i++) {
const mangled = functions.name[i];
const old_id = d.functions.mangledToId[mangled]
if (old_id) {
remap[i] = old_id;
} else {
const newid = d.functions.name.length;
d.functions.mangledToId[mangled] = newid;
d.functions.name.push(mangled);
remap[i] = newid;
assert(!(mangled in d.functions.readableName), mangled + " readable name is already found");
const readables = functions.readableName[mangled];
if (readables !== undefined)
d.functions.readableName[mangled] = readables;
}
}
for (const [fullfield, attrs] of Object.entries(fieldCallAttrs))
d.fieldCallAttrs[remap[fullfield]] = attrs;
for (const [fullfield, csu] of fieldCallCSU.entries())
d.fieldCallCSU.set(remap[fullfield], csu);
for (const call of gcCalls)
d.gcCalls.push(remap[call]);
for (const [caller, name, attrs] of indirectCalls)
d.indirectCalls.push([remap[caller], name, attrs]);
for (const [caller, callees] of rawCallees) {
for (const {callee, any, all} of callees) {
addToMappedList(d.rawCallees, remap[caller]|0, {callee:remap[callee], any, all});
}
}
}
return d;
}
function loadCallgraph(files)
{
const {
fieldCallAttrs,
fieldCallCSU,
gcCalls,
indirectCalls,
rawCallees,
functions
} = mergeRawCallgraphs(files);
assert(ID.jscode == functions.mangledToId["(js-code)"]);
assert(ID.anyfunc == functions.mangledToId["(any-function)"]);
assert(ID.nogcfunc == functions.mangledToId["(nogc-function)"]);
assert(ID.gc == functions.mangledToId["(GC)"]);
addToMappedList(rawCallees, functions.mangledToId["(any-function)"], {callee:ID.gc, any:0, all:0});
// Compute functionAttrs: it should contain the set of functions that
// are *always* called within some sort of limited context (eg GC
// suppression).
// set of mangled names (map from mangled name => [any,all])
const functionAttrs = {};
// Initialize to field calls with attrs set.
for (var [name, attrs] of Object.entries(fieldCallAttrs))
functionAttrs[name] = [attrs, attrs];
@ -223,18 +311,19 @@ function loadCallgraph(file)
// Add in any extra functions at the end. (If we did this early, it would
// mess up the id <-> name correspondence. Also, we need to know if the
// functions even exist in the first place.)
for (var func of extraGCFunctions()) {
addGCFunction(mangledToId[func], "annotation", gcFunctions, functionAttrs);
for (var func of extraGCFunctions(functions.readableName)) {
addGCFunction(functions.mangledToId[func], "annotation", gcFunctions, functionAttrs, functions);
}
for (const func of gcCalls)
addToKeyedList(rawCallees, func, {callee:ID.gc, any:0, all:0});
addToMappedList(rawCallees, func, {callee:ID.gc, any:0, all:0});
for (const [caller, indirect, attrs] of indirectCalls) {
const id = functionNames.length;
functionNames.push(indirect);
mangledToId[indirect] = id;
addToKeyedList(rawCallees, caller, {callee:id, any:attrs, all:attrs});
addToKeyedList(rawCallees, id, {callee:ID.anyfunc, any:0, all:0});
const id = functions.name.length;
functions.name.push(indirect);
functions.mangledToId[indirect] = id;
addToMappedList(rawCallees, caller, {callee:id, any:attrs, all:attrs});
addToMappedList(rawCallees, id, {callee:ID.anyfunc, any:0, all:0});
}
// Callers have a list of callees, with duplicates (if the same function is
@ -271,7 +360,7 @@ function loadCallgraph(file)
//
// Simple example: in the JS shell build, moz_xstrdup calls itself, but
// there are no calls to it from within js/src.
const recursive_roots = gather_recursive_roots(functionAttrs, calleesOf, callersOf);
const recursive_roots = gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions);
// And do a final traversal starting with the recursive roots.
propagate_attrs(recursive_roots, functionAttrs, calleesOf);
@ -302,7 +391,7 @@ function loadCallgraph(file)
// Include all field calls (but not virtual method calls).
for (const [name, csuName] of fieldCallCSU) {
const fullFieldName = functionNames[name];
const fullFieldName = functions.name[name];
if (!fieldCallCannotGC(csuName, fullFieldName)) {
gcFunctions[name] = 'arbitrary function pointer ' + fullFieldName;
worklist.push(name);
@ -318,7 +407,7 @@ function loadCallgraph(file)
continue;
for (const [caller, {any, all}] of callersOf.get(name)) {
if (!(all & ATTR_GC_SUPPRESSED)) {
if (addGCFunction(caller, name, gcFunctions, functionAttrs))
if (addGCFunction(caller, name, gcFunctions, functionAttrs, functions))
worklist.push(caller);
}
}
@ -327,22 +416,58 @@ function loadCallgraph(file)
// Convert functionAttrs to limitedFunctions (using mangled names instead
// of ids.)
// set of mangled names (map from mangled name => {any,all,recursive_root:bool}
var limitedFunctions = {};
for (const [id, [any, all]] of Object.entries(functionAttrs))
limitedFunctions[functionNames[id]] = { attributes: all };
limitedFunctions[functions.name[id]] = { attributes: all };
for (const [id, limits, label] of recursive_roots) {
const name = functionNames[id];
const name = functions.name[id];
const s = limitedFunctions[name] || (limitedFunctions[name] = {});
s.recursive_root = true;
}
// Remap ids to mangled names and return the gcFunctions table.
// Remap ids to mangled names.
const namedGCFunctions = {};
for (const [caller, reason] of Object.entries(gcFunctions)) {
namedGCFunctions[functionNames[caller]] = functionNames[reason] || reason;
namedGCFunctions[functions.name[caller]] = functions.name[reason] || reason;
}
return namedGCFunctions;
return {
gcFunctions: namedGCFunctions,
functions,
calleesOf,
callersOf,
limitedFunctions
};
}
function saveCallgraph(functions, calleesOf) {
// Write out all the ids and their readable names.
let id = -1;
for (const name of functions.name) {
id += 1;
if (id == 0) continue;
print(`#${id} ${name}`);
for (const readable of (functions.readableName[name] || [])) {
if (readable != name)
print(`= ${id} ${readable}`);
}
}
// Omit field calls for now; let them appear as if they were functions.
const attrstring = range => range.any || range.all ? `${range.all}:${range.any} ` : '';
for (const [caller, callees] of calleesOf) {
for (const [callee, attrs] of callees) {
print(`D ${attrstring(attrs)}${caller} ${callee}`);
}
}
// Omit tags for now. This really should preserve all tags. The "GC Call"
// tag will already be represented in the graph by having an edge to the
// "(GC)" node.
}
// Return a worklist of functions with no callers, and also initialize
@ -388,7 +513,7 @@ function propagate_attrs(roots, functionAttrs, calleesOf) {
// Mutually-recursive roots and their descendants will not have been visited,
// and will still be set to [0, ATTRS_UNVISITED]. Scan through and gather them.
function gather_recursive_roots(functionAttrs, calleesOf, callersOf) {
function gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions) {
const roots = [];
// Pick any node. Mark everything reachable by adding to a 'seen' set. At
@ -418,7 +543,6 @@ function gather_recursive_roots(functionAttrs, calleesOf, callersOf) {
const f = work.pop();
if (!calleesOf.has(f)) continue;
for (const callee of calleesOf.get(f).keys()) {
if (!functionAttrs[callee]) debugger;
if (!seen.has(callee) &&
callee != func &&
functionAttrs[callee][1] == ATTRS_UNVISITED)

Просмотреть файл

@ -267,8 +267,6 @@ def gather_hazard_data(command_context, **kwargs):
sys.executable,
os.path.join(script_dir(command_context), "analyze.py"),
"dbs",
"--upto",
"dbs",
"-v",
"--buildcommand=" + buildscript,
]
@ -378,6 +376,7 @@ def analyze(command_context, application, shell_objdir, work_dir, extra):
args += extra
else:
args += [
"--first",
"gcTypes",
"-v",
]

Просмотреть файл

@ -85,7 +85,7 @@ class Test(object):
)
return json.loads(output)
def run_analysis_script(self, phase, upto=None):
def run_analysis_script(self, startPhase, upto=None):
open("defaults.py", "w").write(
"""\
analysis_scriptdir = '{scriptdir}'
@ -98,10 +98,10 @@ sixgill_bin = '{bindir}'
sys.executable,
os.path.join(scriptdir, "analyze.py"),
"-v" if self.verbose else "-q",
phase,
]
cmd += ["--first", startPhase]
if upto:
cmd += ["--upto", upto]
cmd += ["--last", upto]
cmd.append("--source=%s" % self.indir)
cmd.append("--objdir=%s" % self.outdir)
cmd.append("--js=%s" % self.cfg.js)

Просмотреть файл

@ -289,7 +289,56 @@ function addToKeyedList(collection, key, entry)
return collection[key];
}
function addToMappedList(map, key, entry)
{
if (!map.has(key))
map.set(key, []);
map.get(key).push(entry);
return map.get(key);
}
function loadTypeInfo(filename)
{
return JSON.parse(os.file.readFile(filename));
}
// Given the range `first` .. `last`, break it down into `count` batches and
// return the start of the (1-based) `num` batch.
function batchStart(num, count, first, last) {
const N = (last - first) + 1;
return Math.floor((num - 1) / count * N) + first;
}
// As above, but return the last value in the (1-based) `num` batch.
function batchLast(num, count, first, last) {
const N = (last - first) + 1;
return Math.floor(num / count * N) + first - 1;
}
// Debugging tool. See usage below.
function PropertyTracer(traced_prop, check) {
return {
matches(prop, value) {
if (prop != traced_prop)
return false;
if ('value' in check)
return value == check.value;
return true;
},
// Also called when defining a property.
set(obj, prop, value) {
if (this.matches(prop, value))
debugger;
return Reflect.set(...arguments);
},
};
}
// Usage: var myobj = traced({}, 'name', {value: 'Bob'})
//
// This will execute a `debugger;` statement when myobj['name'] is defined or
// set to 'Bob'.
function traced(obj, traced_prop, check) {
return new Proxy(obj, PropertyTracer(traced_prop, check));
}