Bug 1582898 - Split up the callgraph generation step r=jonco

Differential Revision: https://phabricator.services.mozilla.com/D46688
2021-10-18 20:34:39 +00:00 · 2021-10-18 20:34:39 +00:00 · 01d04a3348
--- a/js/src/devtools/rootAnalysis/analyze.py
+++ b/js/src/devtools/rootAnalysis/analyze.py
@ -14,23 +14,25 @@ import argparse
 import os
 import subprocess
 import sys
-import re

 try:
    from shlex import quote
 except ImportError:
    from pipes import quote

-# Python 2/3 version independence polyfills

-anystring_t = str if sys.version_info[0] > 2 else basestring
+def execfile(thefile, globals):
+    exec(compile(open(thefile).read(), filename=thefile, mode="exec"), globals)

-try:
-    execfile
-except Exception:

-    def execfile(thefile, globals):
-        exec(compile(open(thefile).read(), filename=thefile, mode="exec"), globals)
+# Label a string as an output.
+class Output(str):
+    pass
+
+
+# Label a string as a pattern for multiple inputs.
+class MultiInput(str):
+    pass


 def env(config):
@ -45,19 +47,28 @@ def env(config):


 def fill(command, config):
-    try:
-        return tuple(s % config for s in command)
-    except Exception:
-        print("Substitution failed:")
-        problems = []
-        for fragment in command:
-            try:
-                fragment % config
-            except Exception:
-                problems.append(fragment)
-        raise Exception(
-            "\n".join(["Substitution failed:"] + ["  %s" % s for s in problems])
-        )
+    filled = []
+    for s in command:
+        try:
+            rep = s.format(**config)
+        except KeyError:
+            print("Substitution failed: %s" % s)
+            filled = None
+            break
+
+        if isinstance(s, Output):
+            filled.append(Output(rep))
+        elif isinstance(s, MultiInput):
+            N = int(config["jobs"])
+            for i in range(1, N + 1):
+                filled.append(rep.format(i=i, n=N))
+        else:
+            filled.append(rep)
+
+    if filled is None:
+        raise Exception("substitution failure")
+
+    return tuple(filled)


 def print_command(command, outfile=None, env=None):
@ -85,173 +96,216 @@ def print_command(command, outfile=None, env=None):

    print(output)

+JOBS = {
+    'dbs': {
+        'command': [
+            '{analysis_scriptdir}/run_complete',
+            '--foreground',
+            '--no-logs',
+            '--build-root={objdir}',
+            '--wrap-dir={sixgill}/scripts/wrap_gcc',
+            '--work-dir=work',
+            '-b', '{sixgill_bin}',
+            '--buildcommand={buildcommand}',
+            '.'
+        ],
+        'outputs': []
+    },

-def generate_hazards(config, outfilename):
-    jobs = []
-    for i in range(int(config["jobs"])):
-        command = fill(
-            (
-                "%(js)s",
-                "%(analysis_scriptdir)s/analyzeRoots.js",
-                "%(gcFunctions_list)s",
-                "%(gcEdges)s",
-                "%(limitedFunctions_list)s",
-                "%(gcTypes)s",
-                "%(typeInfo)s",
-                str(i + 1),
-                "%(jobs)s",
-                "tmp.%s" % (i + 1,),
-            ),
-            config,
-        )
-        outfile = "rootingHazards.%s" % (i + 1,)
-        output = open(outfile, "w")
-        if config["verbose"]:
-            print_command(command, outfile=outfile, env=env(config))
-        jobs.append((command, Popen(command, stdout=output, env=env(config))))
+    'list-dbs': {
+        'command': ['ls', '-l']
+    },
+
+    'rawcalls': {
+        'command': [
+            '{js}',
+            '{analysis_scriptdir}/computeCallgraph.js',
+            '{typeInfo}',
+            Output('rawcalls'),
+            '{i}', '{n}'
+        ],
+        'multi-output': True,
+        'outputs': ['rawcalls.{i}.of.{n}']
+    },
+
+    'gcFunctions': {
+        'command': [
+            '{js}', '{analysis_scriptdir}/computeGCFunctions.js', MultiInput('{rawcalls}'),
+            '--outputs',
+            Output('callgraph'),
+            Output('gcFunctions'),
+            Output('gcFunctions_list'),
+            Output('gcEdges'),
+            Output('limitedFunctions_list')
+        ],
+        'outputs': [
+            'callgraph.txt',
+            'gcFunctions.txt',
+            'gcFunctions.lst',
+            'gcEdges.txt',
+            'limitedFunctions.lst'
+        ],
+    },
+
+    'gcTypes': {
+        'command': [
+            '{js}', '{analysis_scriptdir}/computeGCTypes.js',
+            Output('gcTypes'), Output('typeInfo')
+        ],
+        'outputs': ['gcTypes.txt', 'typeInfo.txt']
+    },
+
+    'allFunctions': {
+        'command': ['{sixgill_bin}/xdbkeys', 'src_body.xdb'],
+        'redirect-output': 'allFunctions.txt'
+    },
+
+    'hazards': {
+        'command': [
+            '{js}',
+            '{analysis_scriptdir}/analyzeRoots.js',
+            '{gcFunctions_list}',
+            '{gcEdges}',
+            '{limitedFunctions_list}',
+            '{gcTypes}',
+            '{typeInfo}',
+            '{i}', '{n}',
+            'tmp.{i}.of.{n}'
+        ],
+        'multi-output': True,
+        'redirect-output': 'rootingHazards.{i}.of.{n}'
+    },
+
+    'gather-hazards': {
+        'command': ['cat', MultiInput('{hazards}')],
+        'redirect-output': 'rootingHazards.txt'
+    },
+
+    'explain': {
+        'command': [
+            sys.executable,
+            '{analysis_scriptdir}/explain.py',
+            '{gather-hazards}',
+            '{gcFunctions}',
+            Output('explained_hazards'), Output('unnecessary'), Output('refs')
+        ],
+        'outputs': ['hazards.txt', 'unnecessary.txt', 'refs.txt']
+    },
+
+    'heapwrites': {
+        'command': ['{js}', '{analysis_scriptdir}/analyzeHeapWrites.js'],
+        'redirect-output': 'heapWriteHazards.txt'
+    }
+}
+
+
+# Generator of (i, j, item) tuples:
+#  - i is just the index of the yielded tuple (a la enumerate())
+#  - j is the index of the item in the command list
+#  - item is command[j]
+def out_indexes(command):
+    i = 0
+    for (j, fragment) in enumerate(command):
+        if isinstance(fragment, Output):
+            yield (i, j, fragment)
+            i += 1
+
+
+def run_job(name, config):
+    job = JOBS[name]
+    outs = job.get("outputs") or job.get("redirect-output")
+    print("Running " + name + " to generate " + str(outs))
+    if "function" in job:
+        job["function"](config, job["redirect-output"])
+        return
+
+    N = int(config["jobs"]) if job.get("multi-output") else 1
+    config["n"] = N
+    jobs = {}
+    for i in range(1, N + 1):
+        config["i"] = i
+        cmd = fill(job["command"], config)
+        info = spawn_command(cmd, job, name, config)
+        jobs[info["proc"].pid] = info

    final_status = 0
    while jobs:
        pid, status = os.wait()
-        jobs = [job for job in jobs if job[1].pid != pid]
        final_status = final_status or status
+        info = jobs[pid]
+        del jobs[pid]
+        if "redirect" in info:
+            info["redirect"].close()

-    if final_status:
-        raise subprocess.CalledProcessError(final_status, "analyzeRoots.js")
-
-    with open(outfilename, "w") as output:
-        command = ["cat"] + [
-            "rootingHazards.%s" % (i + 1,) for i in range(int(config["jobs"]))
-        ]
-        if config["verbose"]:
-            print_command(command, outfile=outfilename)
-        subprocess.call(command, stdout=output)
-
-
-JOBS = {
-    "dbs": (
-        (
-            "%(analysis_scriptdir)s/run_complete",
-            "--foreground",
-            "--no-logs",
-            "--build-root=%(objdir)s",
-            "--wrap-dir=%(sixgill)s/scripts/wrap_gcc",
-            "--work-dir=work",
-            "-b",
-            "%(sixgill_bin)s",
-            "--buildcommand=%(buildcommand)s",
-            ".",
-        ),
-        (),
-    ),
-    "list-dbs": (("ls", "-l"), ()),
-    "callgraph": (
-        (
-            "%(js)s",
-            "%(analysis_scriptdir)s/computeCallgraph.js",
-            "%(typeInfo)s",
-            "[callgraph]",
-        ),
-        ("callgraph.txt",),
-    ),
-    "gcFunctions": (
-        (
-            "%(js)s",
-            "%(analysis_scriptdir)s/computeGCFunctions.js",
-            "%(callgraph)s",
-            "[gcFunctions]",
-            "[gcFunctions_list]",
-            "[gcEdges]",
-            "[limitedFunctions_list]",
-        ),
-        ("gcFunctions.txt", "gcFunctions.lst", "gcEdges.txt", "limitedFunctions.lst"),
-    ),
-    "gcTypes": (
-        (
-            "%(js)s",
-            "%(analysis_scriptdir)s/computeGCTypes.js",
-            "[gcTypes]",
-            "[typeInfo]",
-        ),
-        ("gcTypes.txt", "typeInfo.txt"),
-    ),
-    "allFunctions": (
-        (
-            "%(sixgill_bin)s/xdbkeys",
-            "src_body.xdb",
-        ),
-        "allFunctions.txt",
-    ),
-    "hazards": (generate_hazards, "rootingHazards.txt"),
-    "explain": (
-        (
-            sys.executable,
-            "%(analysis_scriptdir)s/explain.py",
-            "%(hazards)s",
-            "%(gcFunctions)s",
-            "[explained_hazards]",
-            "[unnecessary]",
-            "[refs]",
-        ),
-        ("hazards.txt", "unnecessary.txt", "refs.txt"),
-    ),
-    "heapwrites": (
-        ("%(js)s", "%(analysis_scriptdir)s/analyzeHeapWrites.js"),
-        "heapWriteHazards.txt",
-    ),
-}
-
-
-def out_indexes(command):
-    for i in range(len(command)):
-        m = re.match(r"^\[(.*)\]$", command[i])
-        if m:
-            yield (i, m.group(1))
-
-
-def run_job(name, config):
-    cmdspec, outfiles = JOBS[name]
-    print("Running " + name + " to generate " + str(outfiles))
-    if hasattr(cmdspec, "__call__"):
-        cmdspec(config, outfiles)
-    else:
-        temp_map = {}
-        cmdspec = fill(cmdspec, config)
-        if isinstance(outfiles, anystring_t):
-            stdout_filename = "%s.tmp" % name
-            temp_map[stdout_filename] = outfiles
-            if config["verbose"]:
-                print_command(cmdspec, outfile=outfiles, env=env(config))
-        else:
-            stdout_filename = None
-            pc = list(cmdspec)
-            outfile = 0
-            for (i, name) in out_indexes(cmdspec):
-                pc[i] = outfiles[outfile]
-                outfile += 1
-            if config["verbose"]:
-                print_command(pc, env=env(config))
-
-        command = list(cmdspec)
-        outfile = 0
-        for (i, name) in out_indexes(cmdspec):
-            command[i] = "%s.tmp" % name
-            temp_map[command[i]] = outfiles[outfile]
-            outfile += 1
-
-        sys.stdout.flush()
-        if stdout_filename is None:
-            subprocess.check_call(command, env=env(config))
-        else:
-            with open(stdout_filename, "w") as output:
-                subprocess.check_call(command, stdout=output, env=env(config))
-        for (temp, final) in temp_map.items():
+        # Rename the temporary files to their final names.
+        for (temp, final) in info["rename_map"].items():
            try:
+                if config["verbose"]:
+                    print("Renaming %s -> %s" % (temp, final))
                os.rename(temp, final)
            except OSError:
                print("Error renaming %s -> %s" % (temp, final))
                raise

+    if final_status != 0:
+        raise Exception("job {} returned status {}".format(name, final_status))
+
+
+def spawn_command(cmdspec, job, name, config):
+    rename_map = {}
+
+    if "redirect-output" in job:
+        stdout_filename = "{}.tmp{}".format(name, config.get("i", ""))
+        final_outfile = job["redirect-output"].format(**config)
+        rename_map[stdout_filename] = final_outfile
+        command = cmdspec
+        if config["verbose"]:
+            print_command(cmdspec, outfile=final_outfile, env=env(config))
+    else:
+        outfiles = job["outputs"]
+        outfiles = fill(outfiles, config)
+        stdout_filename = None
+
+        # To print the supposedly-executed command, replace the Outputs in the
+        # command with final output file names. (The actual command will be
+        # using temporary files that get renamed at the end.)
+        if config["verbose"]:
+            pc = list(cmdspec)
+            for (i, j, name) in out_indexes(cmdspec):
+                pc[j] = outfiles[i]
+            print_command(pc, env=env(config))
+
+        # Replace the Outputs with temporary filenames, and record a mapping
+        # from those temp names to their actual final names that will be used
+        # if the command succeeds.
+        command = list(cmdspec)
+        for (i, j, name) in out_indexes(cmdspec):
+            command[j] = "{}.tmp{}".format(name, config.get("i", ""))
+            rename_map[command[j]] = outfiles[i]
+
+    sys.stdout.flush()
+    info = {"rename_map": rename_map}
+    if stdout_filename:
+        info["redirect"] = open(stdout_filename, "w")
+        info["proc"] = Popen(command, stdout=info["redirect"], env=env(config))
+    else:
+        info["proc"] = Popen(command, env=env(config))
+
+    if config["verbose"]:
+        print("Spawned process {}".format(info["proc"].pid))
+
+    return info
+
+
+# Default to conservatively assuming 4GB/job.
+def max_parallel_jobs(job_size=4 * 2 ** 30):
+    """Return the max number of parallel jobs we can run without overfilling
+    memory, assuming heavyweight jobs."""
+    from_cores = int(subprocess.check_output(["nproc", "--ignore=1"]).strip())
+    mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
+    from_mem = round(mem_bytes / job_size)
+    return min(from_cores, from_mem)
+

 config = {"analysis_scriptdir": os.path.dirname(__file__)}

@ -264,7 +318,7 @@ parser = argparse.ArgumentParser(
    description="Statically analyze build tree for rooting hazards."
 )
 parser.add_argument(
-    "step", metavar="STEP", type=str, nargs="?", help="run starting from this step"
+    "step", metavar="STEP", type=str, nargs="?", help="run only step STEP"
 )
 parser.add_argument(
    "--source", metavar="SOURCE", type=str, nargs="?", help="source code to analyze"
@ -284,7 +338,14 @@ parser.add_argument(
    help="full path to ctypes-capable JS shell",
 )
 parser.add_argument(
-    "--upto", metavar="UPTO", type=str, nargs="?", help="last step to execute"
+    "--first",
+    metavar="STEP",
+    type=str,
+    nargs="?",
+    help="execute all jobs starting with STEP",
+)
+parser.add_argument(
+    "--last", metavar="STEP", type=str, nargs="?", help="stop at step STEP"
 )
 parser.add_argument(
    "--jobs",
@ -350,14 +411,14 @@ if args.tag and not args.buildcommand:
 if args.jobs is not None:
    data["jobs"] = args.jobs
 if not data.get("jobs"):
-    data["jobs"] = int(subprocess.check_output(["nproc", "--ignore=1"]).strip())
+    data["jobs"] = max_parallel_jobs()

 if args.buildcommand:
    data["buildcommand"] = args.buildcommand
 elif "BUILD" in os.environ:
    data["buildcommand"] = os.environ["BUILD"]
 else:
-    data["buildcommand"] = "make -j4 -s"
+    data["buildcommand"] = "make -j{} -s".format(data["jobs"])

 if "ANALYZED_OBJDIR" in os.environ:
    data["objdir"] = os.environ["ANALYZED_OBJDIR"]
@ -370,45 +431,56 @@ if "SOURCE" in os.environ:
 steps = [
    "dbs",
    "gcTypes",
-    "callgraph",
+    "rawcalls",
    "gcFunctions",
    "allFunctions",
    "hazards",
+    "gather-hazards",
    "explain",
    "heapwrites",
 ]

 if args.list:
    for step in steps:
-        command, outfilename = JOBS[step]
-        if outfilename:
-            print("%s -> %s" % (step, outfilename))
+        job = JOBS[step]
+        outfiles = job.get("outputs") or job.get("redirect-output")
+        if outfiles:
+            print(
+                "%s\n    ->%s %s"
+                % (step, "*" if job.get("multi-output") else "", outfiles)
+            )
        else:
            print(step)
    sys.exit(0)

 for step in steps:
-    command, outfiles = JOBS[step]
-    if isinstance(outfiles, anystring_t):
-        data[step] = outfiles
-    else:
-        outfile = 0
-        for (i, name) in out_indexes(command):
-            data[name] = outfiles[outfile]
-            outfile += 1
+    job = JOBS[step]
+    if "redirect-output" in job:
+        data[step] = job["redirect-output"]
+    elif "outputs" in job and "command" in job:
+        outfiles = job["outputs"]
+        for (i, j, name) in out_indexes(job["command"]):
+            data[name] = outfiles[i]
+        num_outputs = len(list(out_indexes(job["command"])))
        assert (
-            len(outfiles) == outfile
-        ), "step '%s': mismatched number of output files (%d) and params (%d)" % (
+            len(outfiles) == num_outputs
+        ), 'step "%s": mismatched number of output files (%d) and params (%d)' % (
            step,
-            outfile,
+            num_outputs,
            len(outfiles),
        )  # NOQA: E501

 if args.step:
-    steps = steps[steps.index(args.step) :]
-
-if args.upto:
-    steps = steps[: steps.index(args.upto) + 1]
+    if args.first or args.last:
+        raise Exception(
+            "--first and --last cannot be used when a step argument is given"
+        )
+    steps = [args.step]
+else:
+    if args.first:
+        steps = steps[steps.index(args.first) :]
+    if args.last:
+        steps = steps[: steps.index(args.last) + 1]

 for step in steps:
    run_job(step, data)
--- a/js/src/devtools/rootAnalysis/analyzeRoots.js
+++ b/js/src/devtools/rootAnalysis/analyzeRoots.js
@ -1114,10 +1114,8 @@ xdb.open("src_body.xdb");
 var minStream = xdb.min_data_stream()|0;
 var maxStream = xdb.max_data_stream()|0;

-var N = (maxStream - minStream) + 1;
-var start = Math.floor((batch - 1) / numBatches * N) + minStream;
-var start_next = Math.floor(batch / numBatches * N) + minStream;
-var end = start_next - 1;
+var start = batchStart(batch, numBatches, minStream, maxStream);
+var end = batchLast(batch, numBatches, minStream, maxStream);

 function process(name, json) {
    functionName = name;
--- a/js/src/devtools/rootAnalysis/annotations.js
+++ b/js/src/devtools/rootAnalysis/annotations.js
@ -40,8 +40,8 @@ function indirectCallCannotGC(fullCaller, fullVariable)

    // This is usually a simple variable name, but sometimes a full name gets
    // passed through. And sometimes that name is truncated. Examples:
-    //   _ZL13gAbortHandler|mozalloc_oom.cpp:void (* gAbortHandler)(size_t)
-    //   _ZL14pMutexUnlockFn|umutex.cpp:void (* pMutexUnlockFn)(const void*
+    //   _ZL13gAbortHandler$mozalloc_oom.cpp:void (* gAbortHandler)(size_t)
+    //   _ZL14pMutexUnlockFn$umutex.cpp:void (* pMutexUnlockFn)(const void*
    var name = readable(fullVariable);

    if (name in ignoreIndirectCalls)
@ -296,7 +296,7 @@ var ignoreFunctions = {
    "void mozilla::dom::JSStreamConsumer::~JSStreamConsumer() [[base_dtor]]": true,
 };

-function extraGCFunctions() {
+function extraGCFunctions(readableNames) {
    return ["ffi_call"].filter(f => f in readableNames);
 }

@ -323,7 +323,7 @@ function isICU(name)
           name.match(/u(prv_malloc|prv_realloc|prv_free|case_toFullLower)_\d+/)
 }

-function ignoreGCFunction(mangled)
+function ignoreGCFunction(mangled, readableNames)
 {
    // Field calls will not be in readableNames
    if (!(mangled in readableNames))
--- a/js/src/devtools/rootAnalysis/computeCallgraph.js
+++ b/js/src/devtools/rootAnalysis/computeCallgraph.js
@ -15,7 +15,9 @@ if (scriptArgs[0] == '--function' || scriptArgs[0] == '-f') {
 }

 var typeInfo_filename = scriptArgs[0] || "typeInfo.txt";
-var callgraphOut_filename = scriptArgs[1] || "callgraph.txt";
+var callgraphOut_filename = scriptArgs[1] || "rawcalls.txt";
+var batch = (scriptArgs[2]|0) || 1;
+var numBatches = (scriptArgs[3]|0) || 1;

 var origOut = os.file.redirect(callgraphOut_filename);

@ -388,7 +390,10 @@ function process(functionName, functionBodies)
        printOnce(`D ${functionId("(js-code)")} ${functionId(functionName)}`);
 }

-for (var nameIndex = minStream; nameIndex <= maxStream; nameIndex++) {
+var start = batchStart(batch, numBatches, minStream, maxStream);
+var end = batchLast(batch, numBatches, minStream, maxStream);
+
+for (var nameIndex = start; nameIndex <= end; nameIndex++) {
    var name = xdb.read_key(nameIndex);
    var data = xdb.read_entry(name);
    process(name.readString(), JSON.parse(data.readString()));
--- a/js/src/devtools/rootAnalysis/computeGCFunctions.js
+++ b/js/src/devtools/rootAnalysis/computeGCFunctions.js
@ -9,24 +9,43 @@ loadRelativeToScript('utility.js');
 loadRelativeToScript('annotations.js');
 loadRelativeToScript('loadCallgraph.js');

+function usage() {
+  throw "Usage: computeGCFunctions.js <rawcalls1.txt> <rawcalls2.txt>... --outputs <out:callgraph.txt> <out:gcFunctions.txt> <out:gcFunctions.lst> <out:gcEdges.txt> <out:limitedFunctions.lst>";
+}
+
 if (typeof scriptArgs[0] != 'string')
-    throw "Usage: computeGCFunctions.js <callgraph.txt> <out:gcFunctions.txt> <out:gcFunctions.lst> <out:gcEdges.txt> <out:limitedFunctions.lst>";
+  usage();

 var start = "Time: " + new Date;

-var callgraph_filename = scriptArgs[0];
-var gcFunctions_filename = scriptArgs[1] || "gcFunctions.txt";
-var gcFunctionsList_filename = scriptArgs[2] || "gcFunctions.lst";
-var gcEdges_filename = scriptArgs[3] || "gcEdges.txt";
+var rawcalls_filenames = [];
+while (scriptArgs.length) {
+  const arg = scriptArgs.shift();
+  if (arg == '--outputs')
+    break;
+  rawcalls_filenames.push(arg);
+}
+if (scriptArgs.length == 0)
+  usage();
+
+var callgraph_filename            = scriptArgs[0] || "callgraph.txt";
+var gcFunctions_filename          = scriptArgs[1] || "gcFunctions.txt";
+var gcFunctionsList_filename      = scriptArgs[2] || "gcFunctions.lst";
+var gcEdges_filename              = scriptArgs[3] || "gcEdges.txt";
 var limitedFunctionsList_filename = scriptArgs[4] || "limitedFunctions.lst";

-var gcFunctions = loadCallgraph(callgraph_filename);
+var {
+  gcFunctions,
+  functions,
+  calleesOf,
+  limitedFunctions
+} = loadCallgraph(rawcalls_filenames);

 printErr("Writing " + gcFunctions_filename);
 redirect(gcFunctions_filename);

 for (var name in gcFunctions) {
-    for (const readable of (readableNames[name] || [name])) {
+    for (let readable of (functions.readableName[name] || [name])) {
        print("");
        const fullname = (name == readable) ? name : name + "$" + readable;
        print("GC Function: " + fullname);
@ -35,8 +54,8 @@ for (var name in gcFunctions) {
            current = gcFunctions[current];
            if (current === 'internal')
                ; // Hit the end
-            else if (current in readableNames)
-                print("    " + readableNames[current][0]);
+            else if (current in functions.readableName)
+                print("    " + functions.readableName[current][0]);
            else
                print("    " + current);
        } while (current in gcFunctions);
@ -46,8 +65,8 @@ for (var name in gcFunctions) {
 printErr("Writing " + gcFunctionsList_filename);
 redirect(gcFunctionsList_filename);
 for (var name in gcFunctions) {
-    if (name in readableNames) {
-        for (var readable of readableNames[name])
+    if (name in functions.readableName) {
+        for (var readable of functions.readableName[name])
            print(name + "$" + readable);
    } else {
        print(name);
@ -75,3 +94,7 @@ for (var block in gcEdges) {
 printErr("Writing " + limitedFunctionsList_filename);
 redirect(limitedFunctionsList_filename);
 print(JSON.stringify(limitedFunctions, null, 4));
+
+printErr("Writing " + callgraph_filename);
+redirect(callgraph_filename);
+saveCallgraph(functions, calleesOf);
--- a/js/src/devtools/rootAnalysis/loadCallgraph.js
+++ b/js/src/devtools/rootAnalysis/loadCallgraph.js
@ -33,23 +33,16 @@ loadRelativeToScript('callgraph.js');
 // consider the mangled name. And some of the names encoded in callgraph.txt
 // are FieldCalls, not just function names.

-var readableNames = {}; // map from mangled name => list of readable names
-var limitedFunctions = {}; // set of mangled names (map from mangled name => [any,all intsets])
 var gcEdges = {};

-// "Map" from identifier to mangled name, or sometimes to a Class.Field name.
-var functionNames = [""];
-
-var mangledToId = {};
-
 // Returns whether the function was added. (It will be refused if it was
 // already there, or if attrs or annotations say it shouldn't be added.)
-function addGCFunction(caller, reason, gcFunctions, functionAttrs)
+function addGCFunction(caller, reason, gcFunctions, functionAttrs, functions)
 {
    if (functionAttrs[caller] && functionAttrs[caller][1] & ATTR_GC_SUPPRESSED)
        return false;

-    if (ignoreGCFunction(functionNames[caller]))
+    if (ignoreGCFunction(functions.name[caller], functions.readableName))
        return false;

    if (!(caller in gcFunctions)) {
@ -74,8 +67,7 @@ function generate_callgraph(rawCallees) {
    const callersOf = new Map();
    const calleesOf = new Map();

-    for (const [caller_prop, callee_attrs] of Object.entries(rawCallees)) {
-        const caller = caller_prop|0;
+    for (const [caller, callee_attrs] of rawCallees) {
        const ordered_callees = [];

        // callee_attrs is a list of {callee,any,all} objects.
@ -115,8 +107,18 @@ function generate_callgraph(rawCallees) {
 }

 // Returns object mapping mangled => reason for GCing
-function loadCallgraph(file)
+function loadRawCallgraphFile(file)
 {
+    const functions = {
+        // "Map" from identifier to mangled name, or sometimes to a Class.Field name.
+        name: [""],
+
+        // map from mangled name => list of readable names
+        readableName: {},
+
+        mangledToId: {}
+    };
+
    const fieldCallAttrs = {};
    const fieldCallCSU = new Map(); // map from full field name id => csu name

@ -126,7 +128,8 @@ function loadCallgraph(file)
    const gcCalls = [];
    const indirectCalls = [];

-    const rawCallees = {}; // map from mangled => list of tuples of {'callee':mangled, 'any':intset, 'all':intset}
+    // map from mangled => list of tuples of {'callee':mangled, 'any':intset, 'all':intset}
+    const rawCallees = new Map();

    for (let line of readFileLines_gen(file)) {
        line = line.replace(/\n/, "");
@ -134,18 +137,18 @@ function loadCallgraph(file)
        let match;
        if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) {
            const [ _, id, mangled ] = match;
-            assert(functionNames.length == id);
-            functionNames.push(mangled);
-            mangledToId[mangled] = id;
+            assert(functions.name.length == id);
+            functions.name.push(mangled);
+            functions.mangledToId[mangled] = id|0;
            continue;
        }
        if (match = line.charAt(0) == "=" && /^= (\d+) (.*)/.exec(line)) {
            const [ _, id, readable ] = match;
-            const mangled = functionNames[id];
-            if (mangled in readableNames)
-                readableNames[mangled].push(readable);
+            const mangled = functions.name[id];
+            if (mangled in functions.readableName)
+                functions.readableName[mangled].push(readable);
            else
-                readableNames[mangled] = [ readable ];
+                functions.readableName[mangled] = [ readable ];
            continue;
        }

@ -165,7 +168,7 @@ function loadCallgraph(file)
        if (match = tag == 'I' && /^I (\d+) VARIABLE ([^\,]*)/.exec(line)) {
            const caller = match[1]|0;
            const name = match[2];
-            if (indirectCallCannotGC(functionNames[caller], name))
+            if (indirectCallCannotGC(functions.name[caller], name))
                attrs |= ATTR_GC_SUPPRESSED;
            indirectCalls.push([caller, "IndirectCall: " + name, attrs]);
        } else if (match = tag == 'F' && /^F (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) {
@ -173,23 +176,23 @@ function loadCallgraph(file)
            const fullfield = match[2]|0;
            const csu = match[3];
            const fullfield_str = csu + "." + match[4];
-            assert(functionNames[fullfield] == fullfield_str);
+            assert(functions.name[fullfield] == fullfield_str);
            if (attrs)
                fieldCallAttrs[fullfield] = attrs;
-            addToKeyedList(rawCallees, caller, {callee:fullfield, any:attrs, all:attrs});
+            addToMappedList(rawCallees, caller, {callee:fullfield, any:attrs, all:attrs});
            fieldCallCSU.set(fullfield, csu);

            if (fieldCallCannotGC(csu, fullfield_str))
-                addToKeyedList(rawCallees, fullfield, {callee:ID.nogcfunc, any:0, all:0});
+                addToMappedList(rawCallees, fullfield, {callee:ID.nogcfunc, any:0, all:0});
            else
-                addToKeyedList(rawCallees, fullfield, {callee:ID.anyfunc, any:0, all:0});
+                addToMappedList(rawCallees, fullfield, {callee:ID.anyfunc, any:0, all:0});
        } else if (match = tag == 'V' && /^V (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) {
            // V tag is no longer used, but we are still emitting it becasue it
            // can be helpful to understand what's going on.
        } else if (match = tag == 'D' && /^D (\d+) (\d+)/.exec(line)) {
            const caller = match[1]|0;
            const callee = match[2]|0;
-            addToKeyedList(rawCallees, caller, {callee, any:attrs, all:attrs});
+            addToMappedList(rawCallees, caller, {callee, any:attrs, all:attrs});
        } else if (match = tag == 'R' && /^R (\d+) (\d+)/.exec(line)) {
            assert(false, "R tag is no longer used");
        } else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) {
@ -202,17 +205,102 @@ function loadCallgraph(file)
        }
    }

-    assert(ID.jscode == mangledToId["(js-code)"]);
-    assert(ID.anyfunc == mangledToId["(any-function)"]);
-    assert(ID.nogcfunc == mangledToId["(nogc-function)"]);
-    assert(ID.gc == mangledToId["(GC)"]);
+    printErr("Loaded " + file);

-    addToKeyedList(rawCallees, mangledToId["(any-function)"], {callee:ID.gc, any:0, all:0});
+    return {
+        fieldCallAttrs,
+        fieldCallCSU,
+        gcCalls,
+        indirectCalls,
+        rawCallees,
+        functions
+    };
+}
+
+// Take a set of rawcalls filenames (as in, the raw callgraph data output by
+// computeCallgraph.js) and combine them into a global callgraph, renumbering
+// the IDs as needed.
+function mergeRawCallgraphs(filenames) {
+    let d;
+    for (const filename of filenames) {
+        const raw = loadRawCallgraphFile(filename);
+        if (!d) {
+            d = raw;
+            continue;
+        }
+
+        const {
+            fieldCallAttrs,
+            fieldCallCSU,
+            gcCalls,
+            indirectCalls,
+            rawCallees,
+            functions
+        } = raw;
+
+        // Compute the ID mapping. Incoming functions that already have an ID
+        // will be mapped to that ID; new ones will allocate a fresh ID.
+        const remap = new Array(functions.name.length);
+        for (let i = 1; i < functions.name.length; i++) {
+            const mangled = functions.name[i];
+            const old_id = d.functions.mangledToId[mangled]
+            if (old_id) {
+                remap[i] = old_id;
+            } else {
+                const newid = d.functions.name.length;
+                d.functions.mangledToId[mangled] = newid;
+                d.functions.name.push(mangled);
+                remap[i] = newid;
+                assert(!(mangled in d.functions.readableName), mangled + " readable name is already found");
+                const readables = functions.readableName[mangled];
+                if (readables !== undefined)
+                    d.functions.readableName[mangled] = readables;
+            }
+        }
+
+        for (const [fullfield, attrs] of Object.entries(fieldCallAttrs))
+            d.fieldCallAttrs[remap[fullfield]] = attrs;
+        for (const [fullfield, csu] of fieldCallCSU.entries())
+            d.fieldCallCSU.set(remap[fullfield], csu);
+        for (const call of gcCalls)
+            d.gcCalls.push(remap[call]);
+        for (const [caller, name, attrs] of indirectCalls)
+            d.indirectCalls.push([remap[caller], name, attrs]);
+        for (const [caller, callees] of rawCallees) {
+            for (const {callee, any, all} of callees) {
+                addToMappedList(d.rawCallees, remap[caller]|0, {callee:remap[callee], any, all});
+            }
+        }
+    }
+
+    return d;
+}
+
+function loadCallgraph(files)
+{
+    const {
+        fieldCallAttrs,
+        fieldCallCSU,
+        gcCalls,
+        indirectCalls,
+        rawCallees,
+        functions
+    } = mergeRawCallgraphs(files);
+
+    assert(ID.jscode == functions.mangledToId["(js-code)"]);
+    assert(ID.anyfunc == functions.mangledToId["(any-function)"]);
+    assert(ID.nogcfunc == functions.mangledToId["(nogc-function)"]);
+    assert(ID.gc == functions.mangledToId["(GC)"]);
+
+    addToMappedList(rawCallees, functions.mangledToId["(any-function)"], {callee:ID.gc, any:0, all:0});

    // Compute functionAttrs: it should contain the set of functions that
    // are *always* called within some sort of limited context (eg GC
    // suppression).

+    // set of mangled names (map from mangled name => [any,all])
+    const functionAttrs = {};
+
    // Initialize to field calls with attrs set.
    for (var [name, attrs] of Object.entries(fieldCallAttrs))
        functionAttrs[name] = [attrs, attrs];
@ -223,18 +311,19 @@ function loadCallgraph(file)
    // Add in any extra functions at the end. (If we did this early, it would
    // mess up the id <-> name correspondence. Also, we need to know if the
    // functions even exist in the first place.)
-    for (var func of extraGCFunctions()) {
-        addGCFunction(mangledToId[func], "annotation", gcFunctions, functionAttrs);
+    for (var func of extraGCFunctions(functions.readableName)) {
+        addGCFunction(functions.mangledToId[func], "annotation", gcFunctions, functionAttrs, functions);
    }

    for (const func of gcCalls)
-        addToKeyedList(rawCallees, func, {callee:ID.gc, any:0, all:0});
+        addToMappedList(rawCallees, func, {callee:ID.gc, any:0, all:0});
+
    for (const [caller, indirect, attrs] of indirectCalls) {
-        const id = functionNames.length;
-        functionNames.push(indirect);
-        mangledToId[indirect] = id;
-        addToKeyedList(rawCallees, caller, {callee:id, any:attrs, all:attrs});
-        addToKeyedList(rawCallees, id, {callee:ID.anyfunc, any:0, all:0});
+        const id = functions.name.length;
+        functions.name.push(indirect);
+        functions.mangledToId[indirect] = id;
+        addToMappedList(rawCallees, caller, {callee:id, any:attrs, all:attrs});
+        addToMappedList(rawCallees, id, {callee:ID.anyfunc, any:0, all:0});
    }

    // Callers have a list of callees, with duplicates (if the same function is
@ -271,7 +360,7 @@ function loadCallgraph(file)
    //
    // Simple example: in the JS shell build, moz_xstrdup calls itself, but
    // there are no calls to it from within js/src.
-    const recursive_roots = gather_recursive_roots(functionAttrs, calleesOf, callersOf);
+    const recursive_roots = gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions);

    // And do a final traversal starting with the recursive roots.
    propagate_attrs(recursive_roots, functionAttrs, calleesOf);
@ -302,7 +391,7 @@ function loadCallgraph(file)

    // Include all field calls (but not virtual method calls).
    for (const [name, csuName] of fieldCallCSU) {
-        const fullFieldName = functionNames[name];
+        const fullFieldName = functions.name[name];
        if (!fieldCallCannotGC(csuName, fullFieldName)) {
            gcFunctions[name] = 'arbitrary function pointer ' + fullFieldName;
            worklist.push(name);
@ -318,7 +407,7 @@ function loadCallgraph(file)
            continue;
        for (const [caller, {any, all}] of callersOf.get(name)) {
            if (!(all & ATTR_GC_SUPPRESSED)) {
-                if (addGCFunction(caller, name, gcFunctions, functionAttrs))
+                if (addGCFunction(caller, name, gcFunctions, functionAttrs, functions))
                    worklist.push(caller);
            }
        }
@ -327,22 +416,58 @@ function loadCallgraph(file)
    // Convert functionAttrs to limitedFunctions (using mangled names instead
    // of ids.)

+    // set of mangled names (map from mangled name => {any,all,recursive_root:bool}
+    var limitedFunctions = {};
+
    for (const [id, [any, all]] of Object.entries(functionAttrs))
-        limitedFunctions[functionNames[id]] = { attributes: all };
+        limitedFunctions[functions.name[id]] = { attributes: all };

    for (const [id, limits, label] of recursive_roots) {
-        const name = functionNames[id];
+        const name = functions.name[id];
        const s = limitedFunctions[name] || (limitedFunctions[name] = {});
        s.recursive_root = true;
    }

-    // Remap ids to mangled names and return the gcFunctions table.
+    // Remap ids to mangled names.
    const namedGCFunctions = {};
    for (const [caller, reason] of Object.entries(gcFunctions)) {
-        namedGCFunctions[functionNames[caller]] = functionNames[reason] || reason;
+        namedGCFunctions[functions.name[caller]] = functions.name[reason] || reason;
    }

-    return namedGCFunctions;
+    return {
+        gcFunctions: namedGCFunctions,
+        functions,
+        calleesOf,
+        callersOf,
+        limitedFunctions
+    };
+}
+
+function saveCallgraph(functions, calleesOf) {
+    // Write out all the ids and their readable names.
+    let id = -1;
+    for (const name of functions.name) {
+        id += 1;
+        if (id == 0) continue;
+        print(`#${id} ${name}`);
+        for (const readable of (functions.readableName[name] || [])) {
+            if (readable != name)
+                print(`= ${id} ${readable}`);
+        }
+    }
+
+    // Omit field calls for now; let them appear as if they were functions.
+
+    const attrstring = range => range.any || range.all ? `${range.all}:${range.any} ` : '';
+    for (const [caller, callees] of calleesOf) {
+        for (const [callee, attrs] of callees) {
+            print(`D ${attrstring(attrs)}${caller} ${callee}`);
+        }
+    }
+
+    // Omit tags for now. This really should preserve all tags. The "GC Call"
+    // tag will already be represented in the graph by having an edge to the
+    // "(GC)" node.
 }

 // Return a worklist of functions with no callers, and also initialize
@ -388,7 +513,7 @@ function propagate_attrs(roots, functionAttrs, calleesOf) {

 // Mutually-recursive roots and their descendants will not have been visited,
 // and will still be set to [0, ATTRS_UNVISITED]. Scan through and gather them.
-function gather_recursive_roots(functionAttrs, calleesOf, callersOf) {
+function gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions) {
    const roots = [];

    // Pick any node. Mark everything reachable by adding to a 'seen' set. At
@ -418,7 +543,6 @@ function gather_recursive_roots(functionAttrs, calleesOf, callersOf) {
            const f = work.pop();
            if (!calleesOf.has(f)) continue;
            for (const callee of calleesOf.get(f).keys()) {
-                if (!functionAttrs[callee]) debugger;
                if (!seen.has(callee) &&
                    callee != func &&
                    functionAttrs[callee][1] == ATTRS_UNVISITED)
--- a/js/src/devtools/rootAnalysis/mach_commands.py
+++ b/js/src/devtools/rootAnalysis/mach_commands.py
@ -267,8 +267,6 @@ def gather_hazard_data(command_context, **kwargs):
        sys.executable,
        os.path.join(script_dir(command_context), "analyze.py"),
        "dbs",
-        "--upto",
-        "dbs",
        "-v",
        "--buildcommand=" + buildscript,
    ]
@ -378,6 +376,7 @@ def analyze(command_context, application, shell_objdir, work_dir, extra):
        args += extra
    else:
        args += [
+            "--first",
            "gcTypes",
            "-v",
        ]
--- a/js/src/devtools/rootAnalysis/t/testlib.py
+++ b/js/src/devtools/rootAnalysis/t/testlib.py
@ -85,7 +85,7 @@ class Test(object):
        )
        return json.loads(output)

-    def run_analysis_script(self, phase, upto=None):
+    def run_analysis_script(self, startPhase, upto=None):
        open("defaults.py", "w").write(
            """\
 analysis_scriptdir = '{scriptdir}'
@ -98,10 +98,10 @@ sixgill_bin = '{bindir}'
            sys.executable,
            os.path.join(scriptdir, "analyze.py"),
            "-v" if self.verbose else "-q",
-            phase,
        ]
+        cmd += ["--first", startPhase]
        if upto:
-            cmd += ["--upto", upto]
+            cmd += ["--last", upto]
        cmd.append("--source=%s" % self.indir)
        cmd.append("--objdir=%s" % self.outdir)
        cmd.append("--js=%s" % self.cfg.js)
--- a/js/src/devtools/rootAnalysis/utility.js
+++ b/js/src/devtools/rootAnalysis/utility.js
@ -289,7 +289,56 @@ function addToKeyedList(collection, key, entry)
    return collection[key];
 }

+function addToMappedList(map, key, entry)
+{
+    if (!map.has(key))
+        map.set(key, []);
+    map.get(key).push(entry);
+    return map.get(key);
+}
+
 function loadTypeInfo(filename)
 {
    return JSON.parse(os.file.readFile(filename));
 }
+
+// Given the range `first` .. `last`, break it down into `count` batches and
+// return the start of the (1-based) `num` batch.
+function batchStart(num, count, first, last) {
+  const N = (last - first) + 1;
+  return Math.floor((num - 1) / count * N) + first;
+}
+
+// As above, but return the last value in the (1-based) `num` batch.
+function batchLast(num, count, first, last) {
+  const N = (last - first) + 1;
+  return Math.floor(num / count * N) + first - 1;
+}
+
+// Debugging tool. See usage below.
+function PropertyTracer(traced_prop, check) {
+    return {
+        matches(prop, value) {
+            if (prop != traced_prop)
+                return false;
+            if ('value' in check)
+                return value == check.value;
+            return true;
+        },
+
+        // Also called when defining a property.
+        set(obj, prop, value) {
+            if (this.matches(prop, value))
+                debugger;
+            return Reflect.set(...arguments);
+        },
+    };
+}
+
+// Usage: var myobj = traced({}, 'name', {value: 'Bob'})
+//
+// This will execute a `debugger;` statement when myobj['name'] is defined or
+// set to 'Bob'.
+function traced(obj, traced_prop, check) {
+  return new Proxy(obj, PropertyTracer(traced_prop, check));
+}