From 20cc1f17363a97170e444cf6d51cd20a51848a68 Mon Sep 17 00:00:00 2001 From: juj Date: Mon, 12 Nov 2018 17:31:58 +0200 Subject: [PATCH] Minify asm.js module import names on -O1 and higher, but not when -g2 or higher is in effect. (#7452) --- emcc.py | 4 +++ emscripten.py | 26 ++++++++++++--- src/settings.js | 5 +++ tests/gen_many_js_functions.py | 43 +++++++++++++++++++++++++ tests/test_other.py | 27 ++++++++++++++++ tools/minified_js_name_generator.py | 49 +++++++++++++++++++++++++++++ 6 files changed, 150 insertions(+), 4 deletions(-) create mode 100644 tests/gen_many_js_functions.py create mode 100644 tools/minified_js_name_generator.py diff --git a/emcc.py b/emcc.py index 4899eec94..52a214af5 100755 --- a/emcc.py +++ b/emcc.py @@ -1255,6 +1255,10 @@ There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR P options.js_opts = True options.force_js_opts = True + # Enable minification of asm.js imports on -O1 and higher if -g1 or lower is used. + if options.opt_level >= 1 and options.debug_level < 2 and not shared.Settings.WASM: + shared.Settings.MINIFY_ASMJS_IMPORT_NAMES = 1 + if shared.Settings.WASM: # When only targeting wasm, the .asm.js file is not executable, so is treated as an intermediate build file that can be cleaned up. if shared.Building.is_wasm_only(): diff --git a/emscripten.py b/emscripten.py index aaf1dd32f..cfb08fb0b 100644 --- a/emscripten.py +++ b/emscripten.py @@ -29,6 +29,7 @@ from tools import jsrun, tempfiles from tools.response_file import substitute_response_files from tools.shared import WINDOWS, asstr, path_from_root, exit_with_error from tools.toolchain_profiler import ToolchainProfiler +from tools.minified_js_name_generator import MinifiedJsNameGenerator if __name__ == '__main__': ToolchainProfiler.record_process_start() @@ -305,14 +306,31 @@ def function_tables_and_exports(funcs, metadata, mem_init, glue, forwarded_data, if shared.Settings.RELOCATABLE: global_funcs += ['g$' + extern for extern in metadata['externs']] + # Tracks the set of used (minified) function names in + # JS symbols imported to asm.js module. + minified_js_names = MinifiedJsNameGenerator() + + # Converts list of imports ['foo', 'bar', ...] to a dictionary of + # name mappings in form { 'minified': 'unminified', ... } + def define_asmjs_import_names(imports): + if shared.Settings.MINIFY_ASMJS_IMPORT_NAMES: + return [(minified_js_names.generate(), i) for i in imports] + else: + return [(i, i) for i in imports] + + basic_funcs = define_asmjs_import_names(basic_funcs) + global_funcs = define_asmjs_import_names(global_funcs) + basic_vars = define_asmjs_import_names(basic_vars) + global_vars = define_asmjs_import_names(global_vars) + bg_funcs = basic_funcs + global_funcs bg_vars = basic_vars + global_vars asm_global_funcs = create_asm_global_funcs(bg_funcs, metadata) asm_global_vars = create_asm_global_vars(bg_vars) the_global = create_the_global(metadata) - sending_vars = basic_funcs + global_funcs + basic_vars + global_vars - sending = '{ ' + ', '.join(['"' + math_fix(s) + '": ' + s for s in sending_vars]) + ' }' + sending_vars = bg_funcs + bg_vars + sending = '{ ' + ', '.join(['"' + math_fix(minified) + '": ' + unminified for (minified, unminified) in sending_vars]) + ' }' receiving = create_receiving(function_table_data, function_tables_defs, exported_implemented_functions) @@ -1078,7 +1096,7 @@ def create_asm_global_funcs(bg_funcs, metadata): maths += ['Math.fround'] asm_global_funcs = ''.join([' var ' + g.replace('.', '_') + '=global' + access_quote(g) + ';\n' for g in maths]) - asm_global_funcs += ''.join([' var ' + g + '=env' + access_quote(math_fix(g)) + ';\n' for g in bg_funcs]) + asm_global_funcs += ''.join([' var ' + unminified + '=env' + access_quote(math_fix(minified)) + ';\n' for (minified, unminified) in bg_funcs]) asm_global_funcs += global_simd_funcs(access_quote, metadata) if shared.Settings.USE_PTHREADS: asm_global_funcs += ''.join([' var Atomics_' + ty + '=global' + access_quote('Atomics') + access_quote(ty) + ';\n' for ty in ['load', 'store', 'exchange', 'compareExchange', 'add', 'sub', 'and', 'or', 'xor']]) @@ -1086,7 +1104,7 @@ def create_asm_global_funcs(bg_funcs, metadata): def create_asm_global_vars(bg_vars): - asm_global_vars = ''.join([' var ' + g + '=env' + access_quote(g) + '|0;\n' for g in bg_vars]) + asm_global_vars = ''.join([' var ' + unminified + '=env' + access_quote(minified) + '|0;\n' for (minified, unminified) in bg_vars]) if shared.Settings.WASM and shared.Settings.SIDE_MODULE: # wasm side modules internally define their stack, these are set at module startup time asm_global_vars += '\n var STACKTOP = 0, STACK_MAX = 0;\n' diff --git a/src/settings.js b/src/settings.js index 71f89e913..aedb728f5 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1271,3 +1271,8 @@ var ENVIRONMENT_MAY_BE_WORKER = 1; var ENVIRONMENT_MAY_BE_NODE = 1; var ENVIRONMENT_MAY_BE_SHELL = 1; var ENVIRONMENT_MAY_BE_WEB_OR_WORKER = 1; + +// Internal: passes information to emscripten.py about whether to minify +// JS -> asm.js import names. Controlled by optimization level, enabled +// at -O1 and higher, but disabled at -g2 and higher. +var MINIFY_ASMJS_IMPORT_NAMES = 0; diff --git a/tests/gen_many_js_functions.py b/tests/gen_many_js_functions.py new file mode 100644 index 000000000..73b90f29d --- /dev/null +++ b/tests/gen_many_js_functions.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python + +# Copyright 2018 The Emscripten Authors. All rights reserved. +# Emscripten is available under two separate licenses, the MIT license and the +# University of Illinois/NCSA Open Source License. Both these licenses can be +# found in the LICENSE file. + +import sys + +NUM_FUNCS_TO_GENERATE = 1000 + +def func_name(i): + return 'thisIsAFunctionWithVeryLongFunctionNameThatWouldBeGreatToBeMinifiedWhenImportingToAsmJsOrWasmSideCodeToCallOtherwiseCodeSizesWillBeLargeAndNetworkTransfersBecomeVerySlowThatUsersWillGoAwayAndVisitSomeOtherSiteInsteadAndThenWebAssemblyDeveloperIsSadOrEvenWorseNobodyNoticesButInternetPipesWillGetMoreCongestedWhichContributesToGlobalWarmingAndThenEveryoneElseWillBeSadAsWellEspeciallyThePolarBearsAndPenguinsJustThinkAboutThePenguins' + str(i+1) + +def generate_js_library_with_lots_of_functions(out_file): + with open(out_file, 'w') as f: + f.write('var FunctionsLibrary = {\n') + + for i in range(NUM_FUNCS_TO_GENERATE): + f.write(' ' + func_name(i) + ': function() { return ' + str(i+1) + '; },\n') + + f.write('}\n'); + f.write('mergeInto(LibraryManager.library, FunctionsLibrary);\n'); + +def generate_c_program_that_calls_js_library_with_lots_of_functions(out_file): + with open(out_file, 'w') as f: + f.write('#include \n\n') + + for i in range(NUM_FUNCS_TO_GENERATE): + f.write('int ' + func_name(i) + '(void);\n') + + f.write('\nint main() {\n') + f.write(' int sum = 0;\n') + + for i in range(NUM_FUNCS_TO_GENERATE): + f.write(' sum += ' + func_name(i) + '();\n') + + f.write('\n printf("Sum of numbers from 1 to ' + str(NUM_FUNCS_TO_GENERATE) + ': %d (expected ' + str(int((NUM_FUNCS_TO_GENERATE * (NUM_FUNCS_TO_GENERATE+1))/2)) + ')\\n", sum);\n'); + f.write('}\n'); + +if __name__ == '__main__': + generate_js_library_with_lots_of_functions(sys.argv[1]) + generate_c_program_that_calls_js_library_with_lots_of_functions(sys.argv[2]) diff --git a/tests/test_other.py b/tests/test_other.py index 72f7921ef..041944469 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -8741,3 +8741,30 @@ int main () { rval = send (0, thing, 0, 0); return 0; }''', '', force_c=True) + + # This test verifies that function names embedded into the build with --js-library (JS functions imported to asm.js/wasm) + # are minified when -O3 is used + def test_js_function_names_are_minified(self): + def check_size(f, expected_size): + if not os.path.isfile(f): + return # Nonexistent file passes in this check + obtained_size = os.path.getsize(f) + print('size of generated ' + f + ': ' + str(obtained_size)) + try_delete(f) + assert obtained_size < expected_size + + run_process([PYTHON, path_from_root('tests', 'gen_many_js_functions.py'), 'library_long.js', 'main_long.c']) + # TODO: Add support to Wasm to minify imports, and then add Wasm testing ['-s', 'WASM=1'] to this list + for wasm in [['-s', 'WASM=1'], ['-s', 'WASM=0']]: + # Currently we rely on Closure for full minification of every appearance of JS function names. + # TODO: Add minification also for non-Closure users and add [] to this list to test minification without Closure. + for closure in [['--closure', '1']]: + args = [PYTHON, EMCC, '-O3', '--js-library', 'library_long.js', 'main_long.c', '-o', 'a.html'] + wasm + closure + print(' '.join(args)) + run_process(args) + + ret = run_process(NODE_JS + ['a.js'], stdout=PIPE).stdout + self.assertTextDataIdentical('Sum of numbers from 1 to 1000: 500500 (expected 500500)', ret.strip()) + + check_size('a.js', 150000) + check_size('a.wasm', 80000) diff --git a/tools/minified_js_name_generator.py b/tools/minified_js_name_generator.py new file mode 100644 index 000000000..ec3a989ab --- /dev/null +++ b/tools/minified_js_name_generator.py @@ -0,0 +1,49 @@ +# Copyright 2018 The Emscripten Authors. All rights reserved. +# Emscripten is available under two separate licenses, the MIT license and the +# University of Illinois/NCSA Open Source License. Both these licenses can be +# found in the LICENSE file. + +# This class can be used to produce a set of minified names to be used as JS +# variables +class MinifiedJsNameGenerator(object): + reserved_names = ['do', 'if', 'in', 'for', 'new', 'try', 'var', 'env', 'let', 'case', 'else', 'enum', 'void', 'this', 'with'] + valid_first_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$" + valid_later_chars = valid_first_chars + '0123456789' + + name_iterator = [] + + overflow_warned = False + + def max_length(self, pos): + return len(self.valid_first_chars) if pos == 0 else len(self.valid_later_chars) + + def produce_name(self): + name = '' + for i in range(len(self.name_iterator) - 1, 0, -1): + name += self.valid_later_chars[self.name_iterator[i]] + name += self.valid_first_chars[self.name_iterator[0]] + return name + + def generate(self): + i = 0 + while i < len(self.name_iterator): + self.name_iterator[i] += 1 + if self.name_iterator[i] >= self.max_length(i): + self.name_iterator[i] = 0 + i += 1 + else: + name = self.produce_name() + if name not in self.reserved_names: + return name + + self.name_iterator += [0] + if len(self.name_iterator) >= 5: + if not self.overflow_warned: + logging.warning('MinifiedJsNameGenerator has only been defined for symbols up to 4 characters! TODO: Add JavaScript reserved names of length 5 and more to this list') + self.overflow_warned = True + + name = self.produce_name() + if name not in self.reserved_names: + return name + else: + return self.generate()