refactor to allow making trampolines optional

2014-10-06 16:37:00 -07:00 · 2014-10-06 16:37:00 -07:00 · 520c85124f
--- a/tools/emterpretify.py
+++ b/tools/emterpretify.py
@ -537,10 +537,11 @@ asm = asm_module.AsmModule(infile)
 # decide which functions will be emterpreted

 emterpreted_funcs = [func for func in asm.funcs if func not in BLACKLIST and not func.startswith('dynCall_')]
+exported_emterpreted_funcs = filter(lambda func: func in emterpreted_funcs, [func.split(':')[0] for func in asm.exports])

 # process functions, generating bytecode
 temp = infile + '.tmp.js'
-shared.Building.js_optimizer(infile, ['emterpretify'], extra_info={ 'emterpretedFuncs': list(emterpreted_funcs), 'opcodes': OPCODES, 'ropcodes': ROPCODES }, output_filename=temp)
+shared.Building.js_optimizer(infile, ['emterpretify'], extra_info={ 'emterpretedFuncs': list(emterpreted_funcs), 'exportedEmterpretedFuncs': list(exported_emterpreted_funcs), 'opcodes': OPCODES, 'ropcodes': ROPCODES }, output_filename=temp, just_concat=True)

 # load the module and modify it
 asm = asm_module.AsmModule(temp)
@ -660,23 +661,20 @@ for i in range(len(lines)):
  line = lines[i]
  if line.startswith('function ') and '}' not in line:
    assert not func
-    func = line.split(' ')[1].split('(')[0]
-  elif line.startswith('}'):
-    assert func
+  elif line.startswith('// EMTERPRET_INFO '):
    try:
-      curr, absolute_targets = json.loads(line[4:])
-    except:
-      if '[' in line: print >> sys.stderr, 'failed to parse code from', line
-      curr = None
-    if curr is not None:
-      assert len(curr) % 4 == 0, curr
-      funcs[func] = len(all_code) # no operation here should change the length
-      if LOG_CODE: print >> sys.stderr, 'raw bytecode for %s:' % func, curr, 'insts:', len(curr)/4
-      process_code(func, curr, absolute_targets)
-      #print >> sys.stderr, 'processed bytecode for %s:' % func, curr
-      all_code += curr
+      func, curr, absolute_targets = json.loads(line[len('// EMTERPRET_INFO '):])
+    except Exception, e:
+      print >> sys.stderr, 'failed to parse code from', line
+      raise e
+    assert len(curr) % 4 == 0, curr
+    funcs[func] = len(all_code) # no operation here should change the length
+    if LOG_CODE: print >> sys.stderr, 'raw bytecode for %s:' % func, curr, 'insts:', len(curr)/4
+    process_code(func, curr, absolute_targets)
+    #print >> sys.stderr, 'processed bytecode for %s:' % func, curr
+    all_code += curr
    func = None
-    lines[i] = '}'
+    lines[i] = ''
  elif line.startswith('// return type: ['):
    name, ret = line.split('[')[1].split(']')[0].split(',')
    if ret == 'undefined':
--- a/tools/js-optimizer.js
+++ b/tools/js-optimizer.js
@ -5974,6 +5974,7 @@ function emterpretify(ast) {
  emitAst = false;

  var EMTERPRETED_FUNCS = set(extraInfo.emterpretedFuncs);
+  var EXPORTED_EMTERPRETED_FUNCS = set(extraInfo.exportedEmterpretedFuncs);
  var OPCODES = extraInfo.opcodes;
  var ROPCODES = extraInfo.ropcodes;

@ -7235,64 +7236,67 @@ function emterpretify(ast) {
    var zero = leaf; // TODO: heuristics
    var onlyLeavesAreZero = true; // if only leaves are zero, then we do not need to save and restore the stack XXX if this is not true, then setjmp and exceptions can fail, as cleanup is skipped!

-    // set up trampoline
-    asmData.vars = {};
-    if (zero && !onlyLeavesAreZero) {
-      // emterpreters run using the stack starting at 0. we must copy it so we can restore it later
-      asmData.vars['sp'] = ASM_INT;
-      func[3].push(srcToStat('sp = EMTSTACKTOP;'));
-      var stackBytes = finalLocals*8;
-      func[3].push(srcToStat('EMTSTACKTOP = EMTSTACKTOP + ' + stackBytes + ' | 0;'));
-      func[3].push(srcToStat('assert(((EMTSTACKTOP|0) <= (EMT_STACK_MAX|0))|0);'));
-      asmData.vars['x'] = ASM_INT;
-      func[3].push(srcToStat('while ((x | 0) < ' + stackBytes + ') { HEAP32[sp + x >> 2] = HEAP32[x >> 2] | 0; x = x + 4 | 0; }'));
-    }
-    // copy our arguments to our stack frame
-    var bump = 0; // we will assert in the emterpreter itself that we did not overflow the emtstack
-    func[2].forEach(function(arg) {
-      var code;
-      switch (asmData.params[arg]) {
-        case ASM_INT:    code = 'HEAP32[' + (zero ? (bump >> 2) : ('EMTSTACKTOP + ' + bump + ' >> 2')) + '] = ' + arg + ';'; break;
-        case ASM_DOUBLE: code = 'HEAPF64[' + (zero ? (bump >> 3) : ('EMTSTACKTOP + ' + bump + ' >> 3')) + '] = ' + arg + ';'; break;
-        case ASM_FLOAT:  code = 'HEAPF32[' + (zero ? (bump >> 2) : ('EMTSTACKTOP + ' + bump + ' >> 2')) + '] = ' + arg + ';'; break;
-        default: throw 'bad';
+    if (1) { //func[1] in EXPORTED_EMTERPRETED_FUNCS) {
+      // set up trampoline
+      asmData.vars = {};
+      if (zero && !onlyLeavesAreZero) {
+        // emterpreters run using the stack starting at 0. we must copy it so we can restore it later
+        asmData.vars['sp'] = ASM_INT;
+        func[3].push(srcToStat('sp = EMTSTACKTOP;'));
+        var stackBytes = finalLocals*8;
+        func[3].push(srcToStat('EMTSTACKTOP = EMTSTACKTOP + ' + stackBytes + ' | 0;'));
+        func[3].push(srcToStat('assert(((EMTSTACKTOP|0) <= (EMT_STACK_MAX|0))|0);'));
+        asmData.vars['x'] = ASM_INT;
+        func[3].push(srcToStat('while ((x | 0) < ' + stackBytes + ') { HEAP32[sp + x >> 2] = HEAP32[x >> 2] | 0; x = x + 4 | 0; }'));
      }
-      func[3].push(srcToStat(code));
-      bump += 8; // each local is a 64-bit value
-    });
-    // prepare the call into the emterpreter
-    var theName = ['name', 'emterpret'];
-    var theCall = ['call', theName, [['name', 'EMTERPRETER_' + func[1]]]]; // EMTERPRETER_* will be replaced with the absolute bytecode offset later
-    // add a return if necessary
-    if (asmData.ret !== undefined) {
-      switch (asmData.ret) {
-        case ASM_INT:    theName[1] += '_i'; break;
-        case ASM_DOUBLE: theName[1] += '_d'; break;
-        default: throw 'bad';
+      // copy our arguments to our stack frame
+      var bump = 0; // we will assert in the emterpreter itself that we did not overflow the emtstack
+      func[2].forEach(function(arg) {
+        var code;
+        switch (asmData.params[arg]) {
+          case ASM_INT:    code = 'HEAP32[' + (zero ? (bump >> 2) : ('EMTSTACKTOP + ' + bump + ' >> 2')) + '] = ' + arg + ';'; break;
+          case ASM_DOUBLE: code = 'HEAPF64[' + (zero ? (bump >> 3) : ('EMTSTACKTOP + ' + bump + ' >> 3')) + '] = ' + arg + ';'; break;
+          case ASM_FLOAT:  code = 'HEAPF32[' + (zero ? (bump >> 2) : ('EMTSTACKTOP + ' + bump + ' >> 2')) + '] = ' + arg + ';'; break;
+          default: throw 'bad';
+        }
+        func[3].push(srcToStat(code));
+        bump += 8; // each local is a 64-bit value
+      });
+      // prepare the call into the emterpreter
+      var theName = ['name', 'emterpret'];
+      var theCall = ['call', theName, [['name', 'EMTERPRETER_' + func[1]]]]; // EMTERPRETER_* will be replaced with the absolute bytecode offset later
+      // add a return if necessary
+      if (asmData.ret !== undefined) {
+        switch (asmData.ret) {
+          case ASM_INT:    theName[1] += '_i'; break;
+          case ASM_DOUBLE: theName[1] += '_d'; break;
+          default: throw 'bad';
+        }
+        asmData.vars['ret'] = asmData.ret;
+        func[3].push(['stat', ['assign', true, ['name', 'ret'], makeAsmCoercion(theCall, asmData.ret)]]);
+      } else {
+        theName[1] += '_i'; // void funcs reuse _i, and ignore the return value
+        func[3].push(['stat', makeAsmCoercion(theCall, ASM_INT)]);
      }
-      asmData.vars['ret'] = asmData.ret;
-      func[3].push(['stat', ['assign', true, ['name', 'ret'], makeAsmCoercion(theCall, asmData.ret)]]);
-    } else {
-      theName[1] += '_i'; // void funcs reuse _i, and ignore the return value
-      func[3].push(['stat', makeAsmCoercion(theCall, ASM_INT)]);
-    }
-    if (zero) {
-      theName[1] += '_z';
-      if (!onlyLeavesAreZero) {
-        // restore the stack
-        func[3].push(srcToStat('x = 0;'));
-        func[3].push(srcToStat('while ((x | 0) < ' + stackBytes + ') { HEAP32[x >> 2] = HEAP32[sp + x >> 2] | 0; x = x + 4 | 0; }'));
-        func[3].push(srcToStat('EMTSTACKTOP = sp;'));
+      if (zero) {
+        theName[1] += '_z';
+        if (!onlyLeavesAreZero) {
+          // restore the stack
+          func[3].push(srcToStat('x = 0;'));
+          func[3].push(srcToStat('while ((x | 0) < ' + stackBytes + ') { HEAP32[x >> 2] = HEAP32[sp + x >> 2] | 0; x = x + 4 | 0; }'));
+          func[3].push(srcToStat('EMTSTACKTOP = sp;'));
+        }
      }
+      // add the return
+      if (asmData.ret !== undefined) {
+        func[3].push(['return', makeAsmCoercion(['name', 'ret'], asmData.ret)]);
+      }
+      // emit trampoline and bytecode
+      denormalizeAsm(func, asmData);
+      prepDotZero(func);
+      print(fixDotZero(astToSrc(func)));
    }
-    // add the return
-    if (asmData.ret !== undefined) {
-      func[3].push(['return', makeAsmCoercion(['name', 'ret'], asmData.ret)]);
-    }
-    // emit trampoline and bytecode
-    denormalizeAsm(func, asmData);
-    prepDotZero(func);
-    print(fixDotZero(astToSrc(func)) + ' //' + JSON.stringify([code, absoluteTargets]));
+    print('// EMTERPRET_INFO ' + JSON.stringify([func[1], code, absoluteTargets]));
  }
  traverseGeneratedFunctions(ast, walkFunction);
 }
--- a/tools/js_optimizer.py
+++ b/tools/js_optimizer.py
@ -100,7 +100,7 @@ def run_on_chunk(command):
    # avoid throwing keyboard interrupts from a child process
    raise Exception()

-def run_on_js(filename, passes, js_engine, jcache, source_map=False, extra_info=None):
+def run_on_js(filename, passes, js_engine, jcache, source_map=False, extra_info=None, just_concat=False):
  if isinstance(jcache, bool) and jcache: jcache = shared.JCache
  if jcache: shared.JCache.ensure()

@ -327,28 +327,34 @@ EMSCRIPTEN_FUNCS();
  f.write(pre);
  pre = None

-  # sort functions by size, to make diffing easier and to improve aot times
-  funcses = []
-  for out_file in filenames:
-    funcses.append(split_funcs(open(out_file).read()))
-  funcs = [item for sublist in funcses for item in sublist]
-  funcses = None
-  def sorter(x, y):
-    diff = len(y[1]) - len(x[1])
-    if diff != 0: return diff
-    if x[0] < y[0]: return 1
-    elif x[0] > y[0]: return -1
-    return 0
-  funcs.sort(sorter)
+  if not just_concat:
+    # sort functions by size, to make diffing easier and to improve aot times
+    funcses = []
+    for out_file in filenames:
+      funcses.append(split_funcs(open(out_file).read()))
+    funcs = [item for sublist in funcses for item in sublist]
+    funcses = None
+    def sorter(x, y):
+      diff = len(y[1]) - len(x[1])
+      if diff != 0: return diff
+      if x[0] < y[0]: return 1
+      elif x[0] > y[0]: return -1
+      return 0
+    funcs.sort(sorter)

-  if 'last' in passes and len(funcs) > 0:
-    count = funcs[0][1].count('\n')
-    if count > 3000:
-      print >> sys.stderr, 'warning: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz, and/or trying OUTLINING_LIMIT to break them up (see settings.js; note that the parameter there affects AST nodes, while we measure lines here, so the two may not match up)' % (count, funcs[0][0])
+    if 'last' in passes and len(funcs) > 0:
+      count = funcs[0][1].count('\n')
+      if count > 3000:
+        print >> sys.stderr, 'warning: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz, and/or trying OUTLINING_LIMIT to break them up (see settings.js; note that the parameter there affects AST nodes, while we measure lines here, so the two may not match up)' % (count, funcs[0][0])

-  for func in funcs:
-    f.write(func[1])
-  funcs = None
+    for func in funcs:
+      f.write(func[1])
+    funcs = None
+  else:
+    # just concat the outputs
+    for out_file in filenames:
+      f.write(open(out_file).read())
+    assert not jcache
  f.write('\n')
  if jcache:
    for cached in cached_outputs:
@ -370,9 +376,9 @@ EMSCRIPTEN_FUNCS();

  return filename

-def run(filename, passes, js_engine=shared.NODE_JS, jcache=False, source_map=False, extra_info=None):
+def run(filename, passes, js_engine=shared.NODE_JS, jcache=False, source_map=False, extra_info=None, just_concat=False):
  js_engine = shared.listify(js_engine)
-  return temp_files.run_and_clean(lambda: run_on_js(filename, passes, js_engine, jcache, source_map, extra_info))
+  return temp_files.run_and_clean(lambda: run_on_js(filename, passes, js_engine, jcache, source_map, extra_info, just_concat))

 if __name__ == '__main__':
  last = sys.argv[-1]
--- a/tools/shared.py
+++ b/tools/shared.py
@ -1588,8 +1588,8 @@ class Building:
    return opts

  @staticmethod
-  def js_optimizer(filename, passes, jcache=False, debug=False, extra_info=None, output_filename=None):
-    ret = js_optimizer.run(filename, passes, listify(NODE_JS), jcache, debug, extra_info)
+  def js_optimizer(filename, passes, jcache=False, debug=False, extra_info=None, output_filename=None, just_concat=False):
+    ret = js_optimizer.run(filename, passes, listify(NODE_JS), jcache, debug, extra_info, just_concat)
    if output_filename:
      safe_move(ret, output_filename)
      ret = output_filename