refactor chunking code and add function ident information

2012-11-20 19:57:54 +01:00 · 2012-11-20 19:57:54 +01:00 · a2b241e70b
--- a/emscripten.py
+++ b/emscripten.py
@ -88,6 +88,7 @@ def emscript(infile, settings, outfile, libraries=[]):
  # Split input into the relevant parts for each phase
  pre = []
  funcs = [] # split up functions here, for parallelism later
+  func_idents = []
  meta = [] # needed by each function XXX

  if DEBUG: t = time.time()
@ -95,16 +96,17 @@ def emscript(infile, settings, outfile, libraries=[]):
  ll_lines = open(infile).readlines()
  for line in ll_lines:
    if in_func:
-      funcs[-1].append(line)
+      funcs[-1][1].append(line)
      if line.startswith('}'):
        in_func = False
-        funcs[-1] = ''.join(funcs[-1])
+        funcs[-1] = (funcs[-1][0], ''.join(funcs[-1][1]))
        pre.append(line) # pre needs it to, so we know about all implemented functions
    else:
      if line.startswith(';'): continue
      if line.startswith('define '):
        in_func = True
-        funcs.append([line])
+        ident = shared.JS.to_nice_ident(line.split('(')[0].split(' ')[-1])
+        funcs.append((ident, [line]))
        pre.append(line) # pre needs it to, so we know about all implemented functions
      elif line.find(' = type { ') > 0:
        pre.append(line) # type
@ -168,22 +170,12 @@ def emscript(infile, settings, outfile, libraries=[]):
  if DEBUG: t = time.time()
  forwarded_json = json.loads(forwarded_data)
  indexed_functions = set()
-  chunks = [] # bundles of functions
-  curr = []
-  for i in range(len(funcs)):
-    func = funcs[i]
-    if len(curr) + len(func) < chunk_size:
-      curr.append(func)
-    else:
-      chunks.append(curr)
-      curr = [func]
-  if curr:
-    chunks.append(curr)
-    curr = None
+
+  chunks = shared.JCache.chunkify(funcs, chunk_size)
  if cores == 1 and total_ll_size < MAX_CHUNK_SIZE: assert len(chunks) == 1, 'no point in splitting up without multiple cores'
  if DEBUG: print >> sys.stderr, '  emscript: phase 2 working on %d chunks %s (intended chunk size: %.2f MB, meta: %.2f MB, forwarded: %.2f MB, total: %.2f MB)' % (len(chunks), ('using %d cores' % cores) if len(chunks) > 1 else '', chunk_size/(1024*1024.), len(meta)/(1024*1024.), len(forwarded_data)/(1024*1024.), total_ll_size/(1024*1024.))

-  commands = [(i, chunks[i], meta, settings_file, compiler, forwarded_file, libraries) for i in range(len(chunks))]
+  commands = [(i, [func[1] for func in chunks[i]], meta, settings_file, compiler, forwarded_file, libraries) for i in range(len(chunks))]

  if len(chunks) > 1:
    pool = multiprocessing.Pool(processes=cores)
--- a/tools/shared.py
+++ b/tools/shared.py
@ -1153,6 +1153,9 @@ class Cache:
    shutil.copyfile(creator(), cachename)
    return cachename

+# JS-specific cache. We cache the results of compilation and optimization,
+# so that in incremental builds we can just load from cache.
+# We cache reasonably-large-sized chunks
 class JCache:
  dirname = os.path.join(Cache.dirname, 'jcache')

@ -1202,6 +1205,31 @@ class JCache:
    cachename = JCache.get_cachename(shortkey)
    cPickle.Pickler(open(cachename, 'wb')).dump([keys, value])

+  # Given a set of functions of form (ident, text), and a preferred chunk size,
+  # generates a set of chunks for parallel processing and caching.
+  # It is very important to generate similar chunks in incremental builds, in
+  # order to maximize the chance of cache hits.
+  @staticmethod
+  def chunkify(funcs, chunk_size):
+    chunks = [] # bundles of functions
+    curr = []
+    for i in range(len(funcs)):
+      func = funcs[i]
+      if len(curr) + len(func[1]) < chunk_size:
+        curr.append(func)
+      else:
+        chunks.append(curr)
+        curr = [func]
+    if curr:
+      chunks.append(curr)
+      curr = None
+    return chunks
+
+class JS:
+  @staticmethod
+  def to_nice_ident(ident): # limited version of the JS function toNiceIdent
+    return ident.replace('%', '$').replace('@', '_');
+
 # Compression of code and data for smaller downloads
 class Compression:
  on = False