diff --git a/tools/emterpretify.py b/tools/emterpretify.py
index 6768ff4d9..f76a8b201 100755
--- a/tools/emterpretify.py
+++ b/tools/emterpretify.py
@@ -537,10 +537,11 @@ asm = asm_module.AsmModule(infile)
 # decide which functions will be emterpreted
 
 emterpreted_funcs = [func for func in asm.funcs if func not in BLACKLIST and not func.startswith('dynCall_')]
+exported_emterpreted_funcs = filter(lambda func: func in emterpreted_funcs, [func.split(':')[0] for func in asm.exports])
 
 # process functions, generating bytecode
 temp = infile + '.tmp.js'
-shared.Building.js_optimizer(infile, ['emterpretify'], extra_info={ 'emterpretedFuncs': list(emterpreted_funcs), 'opcodes': OPCODES, 'ropcodes': ROPCODES }, output_filename=temp)
+shared.Building.js_optimizer(infile, ['emterpretify'], extra_info={ 'emterpretedFuncs': list(emterpreted_funcs), 'exportedEmterpretedFuncs': list(exported_emterpreted_funcs), 'opcodes': OPCODES, 'ropcodes': ROPCODES }, output_filename=temp, just_concat=True)
 
 # load the module and modify it
 asm = asm_module.AsmModule(temp)
@@ -660,23 +661,20 @@ for i in range(len(lines)):
   line = lines[i]
   if line.startswith('function ') and '}' not in line:
     assert not func
-    func = line.split(' ')[1].split('(')[0]
-  elif line.startswith('}'):
-    assert func
+  elif line.startswith('// EMTERPRET_INFO '):
     try:
-      curr, absolute_targets = json.loads(line[4:])
-    except:
-      if '[' in line: print >> sys.stderr, 'failed to parse code from', line
-      curr = None
-    if curr is not None:
-      assert len(curr) % 4 == 0, curr
-      funcs[func] = len(all_code) # no operation here should change the length
-      if LOG_CODE: print >> sys.stderr, 'raw bytecode for %s:' % func, curr, 'insts:', len(curr)/4
-      process_code(func, curr, absolute_targets)
-      #print >> sys.stderr, 'processed bytecode for %s:' % func, curr
-      all_code += curr
+      func, curr, absolute_targets = json.loads(line[len('// EMTERPRET_INFO '):])
+    except Exception, e:
+      print >> sys.stderr, 'failed to parse code from', line
+      raise e
+    assert len(curr) % 4 == 0, curr
+    funcs[func] = len(all_code) # no operation here should change the length
+    if LOG_CODE: print >> sys.stderr, 'raw bytecode for %s:' % func, curr, 'insts:', len(curr)/4
+    process_code(func, curr, absolute_targets)
+    #print >> sys.stderr, 'processed bytecode for %s:' % func, curr
+    all_code += curr
     func = None
-    lines[i] = '}'
+    lines[i] = ''
   elif line.startswith('// return type: ['):
     name, ret = line.split('[')[1].split(']')[0].split(',')
     if ret == 'undefined':
diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js
index 8c524836b..09c623d56 100644
--- a/tools/js-optimizer.js
+++ b/tools/js-optimizer.js
@@ -5974,6 +5974,7 @@ function emterpretify(ast) {
   emitAst = false;
 
   var EMTERPRETED_FUNCS = set(extraInfo.emterpretedFuncs);
+  var EXPORTED_EMTERPRETED_FUNCS = set(extraInfo.exportedEmterpretedFuncs);
   var OPCODES = extraInfo.opcodes;
   var ROPCODES = extraInfo.ropcodes;
 
@@ -7235,64 +7236,67 @@ function emterpretify(ast) {
     var zero = leaf; // TODO: heuristics
     var onlyLeavesAreZero = true; // if only leaves are zero, then we do not need to save and restore the stack XXX if this is not true, then setjmp and exceptions can fail, as cleanup is skipped!
 
-    // set up trampoline
-    asmData.vars = {};
-    if (zero && !onlyLeavesAreZero) {
-      // emterpreters run using the stack starting at 0. we must copy it so we can restore it later
-      asmData.vars['sp'] = ASM_INT;
-      func[3].push(srcToStat('sp = EMTSTACKTOP;'));
-      var stackBytes = finalLocals*8;
-      func[3].push(srcToStat('EMTSTACKTOP = EMTSTACKTOP + ' + stackBytes + ' | 0;'));
-      func[3].push(srcToStat('assert(((EMTSTACKTOP|0) <= (EMT_STACK_MAX|0))|0);'));
-      asmData.vars['x'] = ASM_INT;
-      func[3].push(srcToStat('while ((x | 0) < ' + stackBytes + ') { HEAP32[sp + x >> 2] = HEAP32[x >> 2] | 0; x = x + 4 | 0; }'));
-    }
-    // copy our arguments to our stack frame
-    var bump = 0; // we will assert in the emterpreter itself that we did not overflow the emtstack
-    func[2].forEach(function(arg) {
-      var code;
-      switch (asmData.params[arg]) {
-        case ASM_INT:    code = 'HEAP32[' + (zero ? (bump >> 2) : ('EMTSTACKTOP + ' + bump + ' >> 2')) + '] = ' + arg + ';'; break;
-        case ASM_DOUBLE: code = 'HEAPF64[' + (zero ? (bump >> 3) : ('EMTSTACKTOP + ' + bump + ' >> 3')) + '] = ' + arg + ';'; break;
-        case ASM_FLOAT:  code = 'HEAPF32[' + (zero ? (bump >> 2) : ('EMTSTACKTOP + ' + bump + ' >> 2')) + '] = ' + arg + ';'; break;
-        default: throw 'bad';
+    if (1) { //func[1] in EXPORTED_EMTERPRETED_FUNCS) {
+      // set up trampoline
+      asmData.vars = {};
+      if (zero && !onlyLeavesAreZero) {
+        // emterpreters run using the stack starting at 0. we must copy it so we can restore it later
+        asmData.vars['sp'] = ASM_INT;
+        func[3].push(srcToStat('sp = EMTSTACKTOP;'));
+        var stackBytes = finalLocals*8;
+        func[3].push(srcToStat('EMTSTACKTOP = EMTSTACKTOP + ' + stackBytes + ' | 0;'));
+        func[3].push(srcToStat('assert(((EMTSTACKTOP|0) <= (EMT_STACK_MAX|0))|0);'));
+        asmData.vars['x'] = ASM_INT;
+        func[3].push(srcToStat('while ((x | 0) < ' + stackBytes + ') { HEAP32[sp + x >> 2] = HEAP32[x >> 2] | 0; x = x + 4 | 0; }'));
       }
-      func[3].push(srcToStat(code));
-      bump += 8; // each local is a 64-bit value
-    });
-    // prepare the call into the emterpreter
-    var theName = ['name', 'emterpret'];
-    var theCall = ['call', theName, [['name', 'EMTERPRETER_' + func[1]]]]; // EMTERPRETER_* will be replaced with the absolute bytecode offset later
-    // add a return if necessary
-    if (asmData.ret !== undefined) {
-      switch (asmData.ret) {
-        case ASM_INT:    theName[1] += '_i'; break;
-        case ASM_DOUBLE: theName[1] += '_d'; break;
-        default: throw 'bad';
+      // copy our arguments to our stack frame
+      var bump = 0; // we will assert in the emterpreter itself that we did not overflow the emtstack
+      func[2].forEach(function(arg) {
+        var code;
+        switch (asmData.params[arg]) {
+          case ASM_INT:    code = 'HEAP32[' + (zero ? (bump >> 2) : ('EMTSTACKTOP + ' + bump + ' >> 2')) + '] = ' + arg + ';'; break;
+          case ASM_DOUBLE: code = 'HEAPF64[' + (zero ? (bump >> 3) : ('EMTSTACKTOP + ' + bump + ' >> 3')) + '] = ' + arg + ';'; break;
+          case ASM_FLOAT:  code = 'HEAPF32[' + (zero ? (bump >> 2) : ('EMTSTACKTOP + ' + bump + ' >> 2')) + '] = ' + arg + ';'; break;
+          default: throw 'bad';
+        }
+        func[3].push(srcToStat(code));
+        bump += 8; // each local is a 64-bit value
+      });
+      // prepare the call into the emterpreter
+      var theName = ['name', 'emterpret'];
+      var theCall = ['call', theName, [['name', 'EMTERPRETER_' + func[1]]]]; // EMTERPRETER_* will be replaced with the absolute bytecode offset later
+      // add a return if necessary
+      if (asmData.ret !== undefined) {
+        switch (asmData.ret) {
+          case ASM_INT:    theName[1] += '_i'; break;
+          case ASM_DOUBLE: theName[1] += '_d'; break;
+          default: throw 'bad';
+        }
+        asmData.vars['ret'] = asmData.ret;
+        func[3].push(['stat', ['assign', true, ['name', 'ret'], makeAsmCoercion(theCall, asmData.ret)]]);
+      } else {
+        theName[1] += '_i'; // void funcs reuse _i, and ignore the return value
+        func[3].push(['stat', makeAsmCoercion(theCall, ASM_INT)]);
       }
-      asmData.vars['ret'] = asmData.ret;
-      func[3].push(['stat', ['assign', true, ['name', 'ret'], makeAsmCoercion(theCall, asmData.ret)]]);
-    } else {
-      theName[1] += '_i'; // void funcs reuse _i, and ignore the return value
-      func[3].push(['stat', makeAsmCoercion(theCall, ASM_INT)]);
-    }
-    if (zero) {
-      theName[1] += '_z';
-      if (!onlyLeavesAreZero) {
-        // restore the stack
-        func[3].push(srcToStat('x = 0;'));
-        func[3].push(srcToStat('while ((x | 0) < ' + stackBytes + ') { HEAP32[x >> 2] = HEAP32[sp + x >> 2] | 0; x = x + 4 | 0; }'));
-        func[3].push(srcToStat('EMTSTACKTOP = sp;'));
+      if (zero) {
+        theName[1] += '_z';
+        if (!onlyLeavesAreZero) {
+          // restore the stack
+          func[3].push(srcToStat('x = 0;'));
+          func[3].push(srcToStat('while ((x | 0) < ' + stackBytes + ') { HEAP32[x >> 2] = HEAP32[sp + x >> 2] | 0; x = x + 4 | 0; }'));
+          func[3].push(srcToStat('EMTSTACKTOP = sp;'));
+        }
       }
+      // add the return
+      if (asmData.ret !== undefined) {
+        func[3].push(['return', makeAsmCoercion(['name', 'ret'], asmData.ret)]);
+      }
+      // emit trampoline and bytecode
+      denormalizeAsm(func, asmData);
+      prepDotZero(func);
+      print(fixDotZero(astToSrc(func)));
     }
-    // add the return
-    if (asmData.ret !== undefined) {
-      func[3].push(['return', makeAsmCoercion(['name', 'ret'], asmData.ret)]);
-    }
-    // emit trampoline and bytecode
-    denormalizeAsm(func, asmData);
-    prepDotZero(func);
-    print(fixDotZero(astToSrc(func)) + ' //' + JSON.stringify([code, absoluteTargets]));
+    print('// EMTERPRET_INFO ' + JSON.stringify([func[1], code, absoluteTargets]));
   }
   traverseGeneratedFunctions(ast, walkFunction);
 }
diff --git a/tools/js_optimizer.py b/tools/js_optimizer.py
index 396184acd..083504ecc 100644
--- a/tools/js_optimizer.py
+++ b/tools/js_optimizer.py
@@ -100,7 +100,7 @@ def run_on_chunk(command):
     # avoid throwing keyboard interrupts from a child process
     raise Exception()
 
-def run_on_js(filename, passes, js_engine, jcache, source_map=False, extra_info=None):
+def run_on_js(filename, passes, js_engine, jcache, source_map=False, extra_info=None, just_concat=False):
   if isinstance(jcache, bool) and jcache: jcache = shared.JCache
   if jcache: shared.JCache.ensure()
 
@@ -327,28 +327,34 @@ EMSCRIPTEN_FUNCS();
   f.write(pre);
   pre = None
 
-  # sort functions by size, to make diffing easier and to improve aot times
-  funcses = []
-  for out_file in filenames:
-    funcses.append(split_funcs(open(out_file).read()))
-  funcs = [item for sublist in funcses for item in sublist]
-  funcses = None
-  def sorter(x, y):
-    diff = len(y[1]) - len(x[1])
-    if diff != 0: return diff
-    if x[0] < y[0]: return 1
-    elif x[0] > y[0]: return -1
-    return 0
-  funcs.sort(sorter)
+  if not just_concat:
+    # sort functions by size, to make diffing easier and to improve aot times
+    funcses = []
+    for out_file in filenames:
+      funcses.append(split_funcs(open(out_file).read()))
+    funcs = [item for sublist in funcses for item in sublist]
+    funcses = None
+    def sorter(x, y):
+      diff = len(y[1]) - len(x[1])
+      if diff != 0: return diff
+      if x[0] < y[0]: return 1
+      elif x[0] > y[0]: return -1
+      return 0
+    funcs.sort(sorter)
 
-  if 'last' in passes and len(funcs) > 0:
-    count = funcs[0][1].count('\n')
-    if count > 3000:
-      print >> sys.stderr, 'warning: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz, and/or trying OUTLINING_LIMIT to break them up (see settings.js; note that the parameter there affects AST nodes, while we measure lines here, so the two may not match up)' % (count, funcs[0][0])
+    if 'last' in passes and len(funcs) > 0:
+      count = funcs[0][1].count('\n')
+      if count > 3000:
+        print >> sys.stderr, 'warning: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz, and/or trying OUTLINING_LIMIT to break them up (see settings.js; note that the parameter there affects AST nodes, while we measure lines here, so the two may not match up)' % (count, funcs[0][0])
 
-  for func in funcs:
-    f.write(func[1])
-  funcs = None
+    for func in funcs:
+      f.write(func[1])
+    funcs = None
+  else:
+    # just concat the outputs
+    for out_file in filenames:
+      f.write(open(out_file).read())
+    assert not jcache
   f.write('\n')
   if jcache:
     for cached in cached_outputs:
@@ -370,9 +376,9 @@ EMSCRIPTEN_FUNCS();
 
   return filename
 
-def run(filename, passes, js_engine=shared.NODE_JS, jcache=False, source_map=False, extra_info=None):
+def run(filename, passes, js_engine=shared.NODE_JS, jcache=False, source_map=False, extra_info=None, just_concat=False):
   js_engine = shared.listify(js_engine)
-  return temp_files.run_and_clean(lambda: run_on_js(filename, passes, js_engine, jcache, source_map, extra_info))
+  return temp_files.run_and_clean(lambda: run_on_js(filename, passes, js_engine, jcache, source_map, extra_info, just_concat))
 
 if __name__ == '__main__':
   last = sys.argv[-1]
diff --git a/tools/shared.py b/tools/shared.py
index 8ec756847..c95cf6d36 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -1588,8 +1588,8 @@ class Building:
     return opts
 
   @staticmethod
-  def js_optimizer(filename, passes, jcache=False, debug=False, extra_info=None, output_filename=None):
-    ret = js_optimizer.run(filename, passes, listify(NODE_JS), jcache, debug, extra_info)
+  def js_optimizer(filename, passes, jcache=False, debug=False, extra_info=None, output_filename=None, just_concat=False):
+    ret = js_optimizer.run(filename, passes, listify(NODE_JS), jcache, debug, extra_info, just_concat)
     if output_filename:
       safe_move(ret, output_filename)
       ret = output_filename