Make chunkify a free function

2013-01-30 17:16:12 -08:00 · 2013-01-30 17:16:12 -08:00 · a804dfb628
--- a/emscripten.py
+++ b/emscripten.py
@ -189,7 +189,9 @@ def emscript(configuration, infile, settings, outfile, libraries=[],
    settings['EXPORTED_FUNCTIONS'] = forwarded_json['EXPORTED_FUNCTIONS']
    save_settings()

-  chunks = shared.JCache.chunkify(funcs, chunk_size, 'emscript_files' if jcache else None)
+  chunks = shared.chunkify(
+    funcs, chunk_size,
+    jcache.get_cachename('emscript_files') if jcache else None)

  if jcache:
    # load chunks from cache where we can # TODO: ignore small chunks
--- a/tools/js_optimizer.py
+++ b/tools/js_optimizer.py
@ -106,7 +106,7 @@ def run_on_js(filename, passes, js_engine, jcache):
  intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE))
  chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks))

-  chunks = shared.JCache.chunkify(funcs, chunk_size, 'jsopt' if jcache else None)
+  chunks = shared.chunkify(funcs, chunk_size, jcache.get_cachename('jsopt') if jcache else None)

  if jcache:
    # load chunks from cache where we can # TODO: ignore small chunks
--- a/tools/shared.py
+++ b/tools/shared.py
@ -1249,15 +1249,7 @@ class Cache:

  @classmethod
  def erase(self):
-    try:
-      shutil.rmtree(self.dirname)
-    except:
-      pass
-    try_delete(RELOOPER)
-    try:
-      open(Cache.dirname + '__last_clear', 'w').write('last clear: ' + time.asctime() + '\n')
-    except:
-      print >> sys.stderr, 'failed to save last clear time'
+    shutil.rmtree(self.dirname, ignore_errors=True)

  @classmethod
  def get_path(self, shortname):
@ -1345,100 +1337,124 @@ class JCache:
    #    open(cachename + '.key' + str(i), 'w').write(keys[i])
    #  open(cachename + '.value', 'w').write(value)

-  # Given a set of functions of form (ident, text), and a preferred chunk size,
-  # generates a set of chunks for parallel processing and caching.
-  # It is very important to generate similar chunks in incremental builds, in
-  # order to maximize the chance of cache hits. To achieve that, we save the
-  # chunking used in the previous compilation of this phase, and we try to
-  # generate the same chunks, barring big differences in function sizes that
-  # violate our chunk size guideline. If caching is not used, chunking_file
-  # should be None
-  @classmethod
-  def chunkify(self, funcs, chunk_size, chunking_file):
-    previous_mapping = None
-    if chunking_file:
-      chunking_file = self.get_cachename(chunking_file)
-      if os.path.exists(chunking_file):
-        try:
-          previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in
-          if DEBUG: print >> sys.stderr, 'jscache previous mapping of size %d loaded from %s' % (len(previous_mapping), chunking_file)
-        except Exception, e:
-          print >> sys.stderr, 'Failed to load and unpickle previous chunking file at %s: ' % chunking_file, e
+# Given a set of functions of form (ident, text), and a preferred chunk size,
+# generates a set of chunks for parallel processing and caching.
+# It is very important to generate similar chunks in incremental builds, in
+# order to maximize the chance of cache hits. To achieve that, we save the
+# chunking used in the previous compilation of this phase, and we try to
+# generate the same chunks, barring big differences in function sizes that
+# violate our chunk size guideline. If caching is not used, chunking_file
+# should be None
+@classmethod
+def chunkify(funcs, chunk_size, chunking_file):
+  previous_mapping = None
+  if chunking_file:
+    if os.path.exists(chunking_file):
+      try:
+        previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in
+        if DEBUG: print >> sys.stderr, 'jscache previous mapping of size %d loaded from %s' % (len(previous_mapping), chunking_file)
+      except Exception, e:
+        print >> sys.stderr, 'Failed to load and unpickle previous chunking file at %s: ' % chunking_file, e
+    else:
+      print >> sys.stderr, 'Previous chunking file not found at %s' % chunking_file
+  chunks = []
+  if previous_mapping:
+    # initialize with previous chunking
+    news = []
+    for func in funcs:
+      ident, data = func
+      assert ident, 'need names for jcache chunking'
+      if not ident in previous_mapping:
+        news.append(func)
      else:
-        print >> sys.stderr, 'Previous chunking file not found at %s' % chunking_file
-    chunks = []
-    if previous_mapping:
-      # initialize with previous chunking
-      news = []
-      for func in funcs:
-        ident, data = func
-        assert ident, 'need names for jcache chunking'
-        if not ident in previous_mapping:
-          news.append(func)
-        else:
-          n = previous_mapping[ident]
-          while n >= len(chunks): chunks.append([])
-          chunks[n].append(func)
-      if DEBUG: print >> sys.stderr, 'jscache not in previous chunking', len(news)
-      # add news and adjust for new sizes
-      spilled = news
-      for i in range(len(chunks)):
-        chunk = chunks[i]
-        size = sum([len(func[1]) for func in chunk])
-        #if DEBUG: print >> sys.stderr, 'need spilling?', i, size, len(chunk), 'vs', chunk_size, 1.5*chunk_size
-        while size > 1.5*chunk_size and len(chunk) > 1:
-          spill = chunk.pop()
-          spilled.append(spill)
-          size -= len(spill[1])
-      #if DEBUG: print >> sys.stderr, 'jscache new + spilled', len(spilled)
-      for chunk in chunks:
-        size = sum([len(func[1]) for func in chunk])
-        while size < 0.66*chunk_size and len(spilled) > 0:
-          spill = spilled.pop()
-          chunk.append(spill)
-          size += len(spill[1])
-      chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
-      funcs = spilled # we will allocate these into chunks as if they were normal inputs
-      #if DEBUG: print >> sys.stderr, 'leftover spills', len(spilled)
-    # initialize reasonably, the rest of the funcs we need to split out
-    curr = []
-    total_size = 0
-    for i in range(len(funcs)):
-      func = funcs[i]
-      curr_size = len(func[1])
-      if total_size + curr_size < chunk_size:
-        curr.append(func)
-        total_size += curr_size
-      else:
-        chunks.append(curr)
-        curr = [func]
-        total_size = curr_size
-    if curr:
-      chunks.append(curr)
-      curr = None
-    if chunking_file:
-      # sort within each chunk, to keep the order identical
-      for chunk in chunks:
-        chunk.sort(key=lambda func: func[0])
-      # save new mapping info
-      new_mapping = {}
-      for i in range(len(chunks)):
-        chunk = chunks[i]
-        for ident, data in chunk:
-          assert ident not in new_mapping, 'cannot have duplicate names in jcache chunking'
-          new_mapping[ident] = i
-      cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping)
-      if DEBUG: print >> sys.stderr, 'jscache mapping of size %d saved to %s' % (len(new_mapping), chunking_file)
-      #if DEBUG:
-      #  for i in range(len(chunks)):
-      #    chunk = chunks[i]
-      #    print >> sys.stderr, 'final chunk', i, len(chunk)
-      #  print >> sys.stderr, 'new mapping:', new_mapping
-      #  if previous_mapping:
-      #    for ident in set(previous_mapping.keys() + new_mapping.keys()):
-      #      if previous_mapping.get(ident) != new_mapping.get(ident):
-      #        print >> sys.stderr, 'mapping inconsistency', ident, previous_mapping.get(ident), new_mapping.get(ident)
-    return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names
+        n = previous_mapping[ident]
+        while n >= len(chunks): chunks.append([])
+        chunks[n].append(func)
+    if DEBUG: print >> sys.stderr, 'jscache not in previous chunking', len(news)
+    # add news and adjust for new sizes
+    spilled = news
+    for i in range(len(chunks)):
+      chunk = chunks[i]
+      size = sum([len(func[1]) for func in chunk])
+      #if DEBUG: print >> sys.stderr, 'need spilling?', i, size, len(chunk), 'vs', chunk_size, 1.5*chunk_size
+      while size > 1.5*chunk_size and len(chunk) > 1:
+        spill = chunk.pop()
+        spilled.append(spill)
+        size -= len(spill[1])
+    #if DEBUG: print >> sys.stderr, 'jscache new + spilled', len(spilled)
+    for chunk in chunks:
+      size = sum([len(func[1]) for func in chunk])
+      while size < 0.66*chunk_size and len(spilled) > 0:
+        spill = spilled.pop()
+        chunk.append(spill)
+        size += len(spill[1])
+    chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
+    funcs = spilled # we will allocate these into chunks as if they were normal inputs
+    #if DEBUG: print >> sys.stderr, 'leftover spills', len(spilled)
+  # initialize reasonably, the rest of the funcs we need to split out
+  curr = []
+  total_size = 0
+  for i in range(len(funcs)):
+    func = funcs[i]
+    curr_size = len(func[1])
+    if total_size + curr_size < chunk_size:
+      curr.append(func)
+      total_size += curr_size
+    else:
+      n = previous_mapping[ident]
+      while n >= len(chunks): chunks.append([])
+      chunks[n].append(func)
+  # add news and adjust for new sizes
+  spilled = news
+  for chunk in chunks:
+    size = sum([len(func[1]) for func in chunk])
+    while size > 1.5*chunk_size and len(chunk) > 0:
+      spill = chunk.pop()
+      spilled.append(spill)
+      size -= len(spill[1])
+  for chunk in chunks:
+    size = sum([len(func[1]) for func in chunk])
+    while size < 0.66*chunk_size and len(spilled) > 0:
+      spill = spilled.pop()
+      chunk.append(spill)
+      size += len(spill[1])
+  chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
+  funcs = spilled # we will allocate these into chunks as if they were normal inputs
+# initialize reasonably, the rest of the funcs we need to split out
+curr = []
+total_size = 0
+for i in range(len(funcs)):
+  func = funcs[i]
+  curr_size = len(func[1])
+  if total_size + curr_size < chunk_size:
+    curr.append(func)
+    total_size += curr_size
+  else:
+    chunks.append(curr)
+    curr = None
+  if chunking_file:
+    # sort within each chunk, to keep the order identical
+    for chunk in chunks:
+      chunk.sort(key=lambda func: func[0])
+    # save new mapping info
+    new_mapping = {}
+    for i in range(len(chunks)):
+      chunk = chunks[i]
+      for ident, data in chunk:
+        assert ident not in new_mapping, 'cannot have duplicate names in jcache chunking'
+        new_mapping[ident] = i
+    cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping)
+    if DEBUG: print >> sys.stderr, 'jscache mapping of size %d saved to %s' % (len(new_mapping), chunking_file)
+    #if DEBUG:
+    #  for i in range(len(chunks)):
+    #    chunk = chunks[i]
+    #    print >> sys.stderr, 'final chunk', i, len(chunk)
+    #  print >> sys.stderr, 'new mapping:', new_mapping
+    #  if previous_mapping:
+    #    for ident in set(previous_mapping.keys() + new_mapping.keys()):
+    #      if previous_mapping.get(ident) != new_mapping.get(ident):
+    #        print >> sys.stderr, 'mapping inconsistency', ident, previous_mapping.get(ident), new_mapping.get(ident)
+  return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names

 class JS:
  @staticmethod