Make chunkify a free function
This commit is contained in:
Родитель
8e63a929d4
Коммит
a804dfb628
|
@ -189,7 +189,9 @@ def emscript(configuration, infile, settings, outfile, libraries=[],
|
|||
settings['EXPORTED_FUNCTIONS'] = forwarded_json['EXPORTED_FUNCTIONS']
|
||||
save_settings()
|
||||
|
||||
chunks = shared.JCache.chunkify(funcs, chunk_size, 'emscript_files' if jcache else None)
|
||||
chunks = shared.chunkify(
|
||||
funcs, chunk_size,
|
||||
jcache.get_cachename('emscript_files') if jcache else None)
|
||||
|
||||
if jcache:
|
||||
# load chunks from cache where we can # TODO: ignore small chunks
|
||||
|
|
|
@ -106,7 +106,7 @@ def run_on_js(filename, passes, js_engine, jcache):
|
|||
intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE))
|
||||
chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks))
|
||||
|
||||
chunks = shared.JCache.chunkify(funcs, chunk_size, 'jsopt' if jcache else None)
|
||||
chunks = shared.chunkify(funcs, chunk_size, jcache.get_cachename('jsopt') if jcache else None)
|
||||
|
||||
if jcache:
|
||||
# load chunks from cache where we can # TODO: ignore small chunks
|
||||
|
|
220
tools/shared.py
220
tools/shared.py
|
@ -1249,15 +1249,7 @@ class Cache:
|
|||
|
||||
@classmethod
|
||||
def erase(self):
|
||||
try:
|
||||
shutil.rmtree(self.dirname)
|
||||
except:
|
||||
pass
|
||||
try_delete(RELOOPER)
|
||||
try:
|
||||
open(Cache.dirname + '__last_clear', 'w').write('last clear: ' + time.asctime() + '\n')
|
||||
except:
|
||||
print >> sys.stderr, 'failed to save last clear time'
|
||||
shutil.rmtree(self.dirname, ignore_errors=True)
|
||||
|
||||
@classmethod
|
||||
def get_path(self, shortname):
|
||||
|
@ -1345,100 +1337,124 @@ class JCache:
|
|||
# open(cachename + '.key' + str(i), 'w').write(keys[i])
|
||||
# open(cachename + '.value', 'w').write(value)
|
||||
|
||||
# Given a set of functions of form (ident, text), and a preferred chunk size,
|
||||
# generates a set of chunks for parallel processing and caching.
|
||||
# It is very important to generate similar chunks in incremental builds, in
|
||||
# order to maximize the chance of cache hits. To achieve that, we save the
|
||||
# chunking used in the previous compilation of this phase, and we try to
|
||||
# generate the same chunks, barring big differences in function sizes that
|
||||
# violate our chunk size guideline. If caching is not used, chunking_file
|
||||
# should be None
|
||||
@classmethod
|
||||
def chunkify(self, funcs, chunk_size, chunking_file):
|
||||
previous_mapping = None
|
||||
if chunking_file:
|
||||
chunking_file = self.get_cachename(chunking_file)
|
||||
if os.path.exists(chunking_file):
|
||||
try:
|
||||
previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in
|
||||
if DEBUG: print >> sys.stderr, 'jscache previous mapping of size %d loaded from %s' % (len(previous_mapping), chunking_file)
|
||||
except Exception, e:
|
||||
print >> sys.stderr, 'Failed to load and unpickle previous chunking file at %s: ' % chunking_file, e
|
||||
# Given a set of functions of form (ident, text), and a preferred chunk size,
|
||||
# generates a set of chunks for parallel processing and caching.
|
||||
# It is very important to generate similar chunks in incremental builds, in
|
||||
# order to maximize the chance of cache hits. To achieve that, we save the
|
||||
# chunking used in the previous compilation of this phase, and we try to
|
||||
# generate the same chunks, barring big differences in function sizes that
|
||||
# violate our chunk size guideline. If caching is not used, chunking_file
|
||||
# should be None
|
||||
@classmethod
|
||||
def chunkify(funcs, chunk_size, chunking_file):
|
||||
previous_mapping = None
|
||||
if chunking_file:
|
||||
if os.path.exists(chunking_file):
|
||||
try:
|
||||
previous_mapping = cPickle.Unpickler(open(chunking_file, 'rb')).load() # maps a function identifier to the chunk number it will be in
|
||||
if DEBUG: print >> sys.stderr, 'jscache previous mapping of size %d loaded from %s' % (len(previous_mapping), chunking_file)
|
||||
except Exception, e:
|
||||
print >> sys.stderr, 'Failed to load and unpickle previous chunking file at %s: ' % chunking_file, e
|
||||
else:
|
||||
print >> sys.stderr, 'Previous chunking file not found at %s' % chunking_file
|
||||
chunks = []
|
||||
if previous_mapping:
|
||||
# initialize with previous chunking
|
||||
news = []
|
||||
for func in funcs:
|
||||
ident, data = func
|
||||
assert ident, 'need names for jcache chunking'
|
||||
if not ident in previous_mapping:
|
||||
news.append(func)
|
||||
else:
|
||||
print >> sys.stderr, 'Previous chunking file not found at %s' % chunking_file
|
||||
chunks = []
|
||||
if previous_mapping:
|
||||
# initialize with previous chunking
|
||||
news = []
|
||||
for func in funcs:
|
||||
ident, data = func
|
||||
assert ident, 'need names for jcache chunking'
|
||||
if not ident in previous_mapping:
|
||||
news.append(func)
|
||||
else:
|
||||
n = previous_mapping[ident]
|
||||
while n >= len(chunks): chunks.append([])
|
||||
chunks[n].append(func)
|
||||
if DEBUG: print >> sys.stderr, 'jscache not in previous chunking', len(news)
|
||||
# add news and adjust for new sizes
|
||||
spilled = news
|
||||
for i in range(len(chunks)):
|
||||
chunk = chunks[i]
|
||||
size = sum([len(func[1]) for func in chunk])
|
||||
#if DEBUG: print >> sys.stderr, 'need spilling?', i, size, len(chunk), 'vs', chunk_size, 1.5*chunk_size
|
||||
while size > 1.5*chunk_size and len(chunk) > 1:
|
||||
spill = chunk.pop()
|
||||
spilled.append(spill)
|
||||
size -= len(spill[1])
|
||||
#if DEBUG: print >> sys.stderr, 'jscache new + spilled', len(spilled)
|
||||
for chunk in chunks:
|
||||
size = sum([len(func[1]) for func in chunk])
|
||||
while size < 0.66*chunk_size and len(spilled) > 0:
|
||||
spill = spilled.pop()
|
||||
chunk.append(spill)
|
||||
size += len(spill[1])
|
||||
chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
|
||||
funcs = spilled # we will allocate these into chunks as if they were normal inputs
|
||||
#if DEBUG: print >> sys.stderr, 'leftover spills', len(spilled)
|
||||
# initialize reasonably, the rest of the funcs we need to split out
|
||||
curr = []
|
||||
total_size = 0
|
||||
for i in range(len(funcs)):
|
||||
func = funcs[i]
|
||||
curr_size = len(func[1])
|
||||
if total_size + curr_size < chunk_size:
|
||||
curr.append(func)
|
||||
total_size += curr_size
|
||||
else:
|
||||
chunks.append(curr)
|
||||
curr = [func]
|
||||
total_size = curr_size
|
||||
if curr:
|
||||
chunks.append(curr)
|
||||
curr = None
|
||||
if chunking_file:
|
||||
# sort within each chunk, to keep the order identical
|
||||
for chunk in chunks:
|
||||
chunk.sort(key=lambda func: func[0])
|
||||
# save new mapping info
|
||||
new_mapping = {}
|
||||
for i in range(len(chunks)):
|
||||
chunk = chunks[i]
|
||||
for ident, data in chunk:
|
||||
assert ident not in new_mapping, 'cannot have duplicate names in jcache chunking'
|
||||
new_mapping[ident] = i
|
||||
cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping)
|
||||
if DEBUG: print >> sys.stderr, 'jscache mapping of size %d saved to %s' % (len(new_mapping), chunking_file)
|
||||
#if DEBUG:
|
||||
# for i in range(len(chunks)):
|
||||
# chunk = chunks[i]
|
||||
# print >> sys.stderr, 'final chunk', i, len(chunk)
|
||||
# print >> sys.stderr, 'new mapping:', new_mapping
|
||||
# if previous_mapping:
|
||||
# for ident in set(previous_mapping.keys() + new_mapping.keys()):
|
||||
# if previous_mapping.get(ident) != new_mapping.get(ident):
|
||||
# print >> sys.stderr, 'mapping inconsistency', ident, previous_mapping.get(ident), new_mapping.get(ident)
|
||||
return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names
|
||||
n = previous_mapping[ident]
|
||||
while n >= len(chunks): chunks.append([])
|
||||
chunks[n].append(func)
|
||||
if DEBUG: print >> sys.stderr, 'jscache not in previous chunking', len(news)
|
||||
# add news and adjust for new sizes
|
||||
spilled = news
|
||||
for i in range(len(chunks)):
|
||||
chunk = chunks[i]
|
||||
size = sum([len(func[1]) for func in chunk])
|
||||
#if DEBUG: print >> sys.stderr, 'need spilling?', i, size, len(chunk), 'vs', chunk_size, 1.5*chunk_size
|
||||
while size > 1.5*chunk_size and len(chunk) > 1:
|
||||
spill = chunk.pop()
|
||||
spilled.append(spill)
|
||||
size -= len(spill[1])
|
||||
#if DEBUG: print >> sys.stderr, 'jscache new + spilled', len(spilled)
|
||||
for chunk in chunks:
|
||||
size = sum([len(func[1]) for func in chunk])
|
||||
while size < 0.66*chunk_size and len(spilled) > 0:
|
||||
spill = spilled.pop()
|
||||
chunk.append(spill)
|
||||
size += len(spill[1])
|
||||
chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
|
||||
funcs = spilled # we will allocate these into chunks as if they were normal inputs
|
||||
#if DEBUG: print >> sys.stderr, 'leftover spills', len(spilled)
|
||||
# initialize reasonably, the rest of the funcs we need to split out
|
||||
curr = []
|
||||
total_size = 0
|
||||
for i in range(len(funcs)):
|
||||
func = funcs[i]
|
||||
curr_size = len(func[1])
|
||||
if total_size + curr_size < chunk_size:
|
||||
curr.append(func)
|
||||
total_size += curr_size
|
||||
else:
|
||||
n = previous_mapping[ident]
|
||||
while n >= len(chunks): chunks.append([])
|
||||
chunks[n].append(func)
|
||||
# add news and adjust for new sizes
|
||||
spilled = news
|
||||
for chunk in chunks:
|
||||
size = sum([len(func[1]) for func in chunk])
|
||||
while size > 1.5*chunk_size and len(chunk) > 0:
|
||||
spill = chunk.pop()
|
||||
spilled.append(spill)
|
||||
size -= len(spill[1])
|
||||
for chunk in chunks:
|
||||
size = sum([len(func[1]) for func in chunk])
|
||||
while size < 0.66*chunk_size and len(spilled) > 0:
|
||||
spill = spilled.pop()
|
||||
chunk.append(spill)
|
||||
size += len(spill[1])
|
||||
chunks = filter(lambda chunk: len(chunk) > 0, chunks) # might have empty ones, eliminate them
|
||||
funcs = spilled # we will allocate these into chunks as if they were normal inputs
|
||||
# initialize reasonably, the rest of the funcs we need to split out
|
||||
curr = []
|
||||
total_size = 0
|
||||
for i in range(len(funcs)):
|
||||
func = funcs[i]
|
||||
curr_size = len(func[1])
|
||||
if total_size + curr_size < chunk_size:
|
||||
curr.append(func)
|
||||
total_size += curr_size
|
||||
else:
|
||||
chunks.append(curr)
|
||||
curr = None
|
||||
if chunking_file:
|
||||
# sort within each chunk, to keep the order identical
|
||||
for chunk in chunks:
|
||||
chunk.sort(key=lambda func: func[0])
|
||||
# save new mapping info
|
||||
new_mapping = {}
|
||||
for i in range(len(chunks)):
|
||||
chunk = chunks[i]
|
||||
for ident, data in chunk:
|
||||
assert ident not in new_mapping, 'cannot have duplicate names in jcache chunking'
|
||||
new_mapping[ident] = i
|
||||
cPickle.Pickler(open(chunking_file, 'wb')).dump(new_mapping)
|
||||
if DEBUG: print >> sys.stderr, 'jscache mapping of size %d saved to %s' % (len(new_mapping), chunking_file)
|
||||
#if DEBUG:
|
||||
# for i in range(len(chunks)):
|
||||
# chunk = chunks[i]
|
||||
# print >> sys.stderr, 'final chunk', i, len(chunk)
|
||||
# print >> sys.stderr, 'new mapping:', new_mapping
|
||||
# if previous_mapping:
|
||||
# for ident in set(previous_mapping.keys() + new_mapping.keys()):
|
||||
# if previous_mapping.get(ident) != new_mapping.get(ident):
|
||||
# print >> sys.stderr, 'mapping inconsistency', ident, previous_mapping.get(ident), new_mapping.get(ident)
|
||||
return [''.join([func[1] for func in chunk]) for chunk in chunks] # remove function names
|
||||
|
||||
class JS:
|
||||
@staticmethod
|
||||
|
|
Загрузка…
Ссылка в новой задаче