refactor chunking code and add function ident information

This commit is contained in:
Alon Zakai 2012-11-20 19:57:54 +01:00
Родитель db65c00f8e
Коммит a2b241e70b
2 изменённых файлов: 36 добавлений и 16 удалений

Просмотреть файл

@ -88,6 +88,7 @@ def emscript(infile, settings, outfile, libraries=[]):
# Split input into the relevant parts for each phase
pre = []
funcs = [] # split up functions here, for parallelism later
func_idents = []
meta = [] # needed by each function XXX
if DEBUG: t = time.time()
@ -95,16 +96,17 @@ def emscript(infile, settings, outfile, libraries=[]):
ll_lines = open(infile).readlines()
for line in ll_lines:
if in_func:
funcs[-1].append(line)
funcs[-1][1].append(line)
if line.startswith('}'):
in_func = False
funcs[-1] = ''.join(funcs[-1])
funcs[-1] = (funcs[-1][0], ''.join(funcs[-1][1]))
pre.append(line) # pre needs it to, so we know about all implemented functions
else:
if line.startswith(';'): continue
if line.startswith('define '):
in_func = True
funcs.append([line])
ident = shared.JS.to_nice_ident(line.split('(')[0].split(' ')[-1])
funcs.append((ident, [line]))
pre.append(line) # pre needs it to, so we know about all implemented functions
elif line.find(' = type { ') > 0:
pre.append(line) # type
@ -168,22 +170,12 @@ def emscript(infile, settings, outfile, libraries=[]):
if DEBUG: t = time.time()
forwarded_json = json.loads(forwarded_data)
indexed_functions = set()
chunks = [] # bundles of functions
curr = []
for i in range(len(funcs)):
func = funcs[i]
if len(curr) + len(func) < chunk_size:
curr.append(func)
else:
chunks.append(curr)
curr = [func]
if curr:
chunks.append(curr)
curr = None
chunks = shared.JCache.chunkify(funcs, chunk_size)
if cores == 1 and total_ll_size < MAX_CHUNK_SIZE: assert len(chunks) == 1, 'no point in splitting up without multiple cores'
if DEBUG: print >> sys.stderr, ' emscript: phase 2 working on %d chunks %s (intended chunk size: %.2f MB, meta: %.2f MB, forwarded: %.2f MB, total: %.2f MB)' % (len(chunks), ('using %d cores' % cores) if len(chunks) > 1 else '', chunk_size/(1024*1024.), len(meta)/(1024*1024.), len(forwarded_data)/(1024*1024.), total_ll_size/(1024*1024.))
commands = [(i, chunks[i], meta, settings_file, compiler, forwarded_file, libraries) for i in range(len(chunks))]
commands = [(i, [func[1] for func in chunks[i]], meta, settings_file, compiler, forwarded_file, libraries) for i in range(len(chunks))]
if len(chunks) > 1:
pool = multiprocessing.Pool(processes=cores)

Просмотреть файл

@ -1153,6 +1153,9 @@ class Cache:
shutil.copyfile(creator(), cachename)
return cachename
# JS-specific cache. We cache the results of compilation and optimization,
# so that in incremental builds we can just load from cache.
# We cache reasonably-large-sized chunks
class JCache:
dirname = os.path.join(Cache.dirname, 'jcache')
@ -1202,6 +1205,31 @@ class JCache:
cachename = JCache.get_cachename(shortkey)
cPickle.Pickler(open(cachename, 'wb')).dump([keys, value])
# Given a set of functions of form (ident, text), and a preferred chunk size,
# generates a set of chunks for parallel processing and caching.
# It is very important to generate similar chunks in incremental builds, in
# order to maximize the chance of cache hits.
@staticmethod
def chunkify(funcs, chunk_size):
chunks = [] # bundles of functions
curr = []
for i in range(len(funcs)):
func = funcs[i]
if len(curr) + len(func[1]) < chunk_size:
curr.append(func)
else:
chunks.append(curr)
curr = [func]
if curr:
chunks.append(curr)
curr = None
return chunks
class JS:
@staticmethod
def to_nice_ident(ident): # limited version of the JS function toNiceIdent
return ident.replace('%', '$').replace('@', '_');
# Compression of code and data for smaller downloads
class Compression:
on = False