From 77aa2a72f38d73e71eef48cacfe72cce33fc7201 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sat, 3 May 2014 12:09:37 -0700 Subject: [PATCH] emit a global const for Math_fround(0) to avoid function call overheads in the fround polyfill --- emcc | 2 +- emscripten.py | 2 +- src/settings.js | 9 ++- tools/js-optimizer.js | 55 ++++++++++--------- tools/test-js-optimizer-asm-pre-f32.js | 8 ++- tools/test-js-optimizer-asm-pre-output-f32.js | 8 ++- 6 files changed, 54 insertions(+), 30 deletions(-) diff --git a/emcc b/emcc index b9d747137..1629f5c74 100755 --- a/emcc +++ b/emcc @@ -1727,7 +1727,7 @@ try: # with commaified code breaks late aggressive variable elimination) if shared.Settings.SIMPLIFY_IFS and (debug_level == 0 or profiling) and shared.Settings.OUTLINING_LIMIT == 0: js_optimizer_queue += ['simplifyIfs'] - if opt_level >= 3 and shared.Settings.PRECISE_F32: js_optimizer_queue += ['optimizeFrounds'] + if shared.Settings.PRECISE_F32: js_optimizer_queue += ['optimizeFrounds'] if closure and not shared.Settings.ASM_JS: flush_js_optimizer_queue() diff --git a/emscripten.py b/emscripten.py index bf55ee43f..16f9eb330 100755 --- a/emscripten.py +++ b/emscripten.py @@ -1165,7 +1165,7 @@ var asm = (function(global, env, buffer) { var nan = +env.NaN, inf = +env.Infinity; var tempInt = 0, tempBigInt = 0, tempBigIntP = 0, tempBigIntS = 0, tempBigIntR = 0.0, tempBigIntI = 0, tempBigIntD = 0, tempValue = 0, tempDouble = 0.0; ''' + ''.join([''' - var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs] + [' var tempFloat = %s;\n' % ('Math_fround(0)' if settings.get('PRECISE_F32') else '0.0')] + [''' + var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs] + [' var tempFloat = %s;\n' % ('Math_fround(0)' if settings.get('PRECISE_F32') else '0.0')] + ([' const f0 = Math_fround(0);\n'] if settings.get('PRECISE_F32') else []) + [''' // EMSCRIPTEN_START_FUNCS function stackAlloc(size) { size = size|0; diff --git a/src/settings.js b/src/settings.js index a9a724253..3289eace6 100644 --- a/src/settings.js +++ b/src/settings.js @@ -124,13 +124,20 @@ var PRECISE_I32_MUL = 1; // If enabled, i32 multiplication is done with full pre var PRECISE_F32 = 0; // 0: Use JS numbers for floating-point values. These are 64-bit and do not model C++ // floats exactly, which are 32-bit. // 1: Model C++ floats precisely, using Math.fround, polyfilling when necessary. This - // can be slow if the polyfill is used on heavy float32 computation. + // can be slow if the polyfill is used on heavy float32 computation. See note on + // browser support below. // 2: Model C++ floats precisely using Math.fround if available in the JS engine, otherwise // use an empty polyfill. This will have much less of a speed penalty than using the full // polyfill in cases where engine support is not present. In addition, we can // remove the empty polyfill calls themselves on the client when generating html, // which should mean that this gives you the best of both worlds of 0 and 1, and is // therefore recommended. + // XXX Note: To optimize float32-using code, we use the 'const' keyword in the emitted + // code. This allows us to avoid unnecessary calls to Math.fround, which would + // slow down engines not yet supporting that function. 'const' is present in + // all modern browsers, including Firefox, Chrome and Safari, but in IE is only + // present in IE11 and above. Therefore if you need to support legacy versions of + // IE, you should not enable PRECISE_F32 1 or 2. var SIMD = 0; // Whether to emit SIMD code ( https://github.com/johnmccutchan/ecmascript_simd ) var CLOSURE_COMPILER = 0; // Whether closure compiling is being run on this output diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js index db85965dc..2914b6e84 100644 --- a/tools/js-optimizer.js +++ b/tools/js-optimizer.js @@ -1342,13 +1342,21 @@ var ASM_DOUBLE = 1; var ASM_FLOAT = 2; var ASM_NONE = 3; -function detectAsmCoercion(node, asmInfo) { +var ASM_FLOAT_ZERO = null; // TODO: share the entire node? + +function detectAsmCoercion(node, asmInfo, inVarDef) { // for params, +x vs x|0, for vars, 0.0 vs 0 if (node[0] === 'num' && node[1].toString().indexOf('.') >= 0) return ASM_DOUBLE; if (node[0] === 'unary-prefix') return ASM_DOUBLE; if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround') return ASM_FLOAT; if (asmInfo && node[0] == 'name') return getAsmType(node[1], asmInfo); - if (node[0] === 'name') return ASM_NONE; + if (node[0] === 'name') { + if (!inVarDef) return ASM_NONE; + // We are in a variable definition, where Math_fround(0) optimized into a global constant becomes f0 = Math_fround(0) + if (!ASM_FLOAT_ZERO) ASM_FLOAT_ZERO = node[1]; + else assert(ASM_FLOAT_ZERO === node[1]); + return ASM_FLOAT; + } return ASM_INT; } @@ -1366,7 +1374,13 @@ function makeAsmVarDef(v, type) { switch (type) { case ASM_INT: return [v, ['num', 0]]; case ASM_DOUBLE: return [v, ['unary-prefix', '+', ['num', 0]]]; - case ASM_FLOAT: return [v, ['call', ['name', 'Math_fround'], [['num', 0]]]]; + case ASM_FLOAT: { + if (ASM_FLOAT_ZERO) { + return [v, ['name', ASM_FLOAT_ZERO]]; + } else { + return [v, ['call', ['name', 'Math_fround'], [['num', 0]]]]; + } + } default: throw 'wha? ' + JSON.stringify([node, type]) + new Error().stack; } } @@ -1409,9 +1423,7 @@ function normalizeAsm(func) { var name = v[0]; var value = v[1]; if (!(name in data.vars)) { - assert(value[0] === 'num' || (value[0] === 'unary-prefix' && value[2][0] === 'num') // must be valid coercion no-op - || (value[0] === 'call' && value[1][0] === 'name' && value[1][1] === 'Math_fround')); - data.vars[name] = detectAsmCoercion(value); + data.vars[name] = detectAsmCoercion(value, null, true); v.length = 1; // make an un-assigning var } else { assert(j === 0, 'cannot break in the middle'); @@ -1425,22 +1437,6 @@ function normalizeAsm(func) { traverse(stats[i], function(node, type) { if (type === 'var') { assert(0, 'should be no vars to fix! ' + func[1] + ' : ' + JSON.stringify(node)); - /* - for (var j = 0; j < node[1].length; j++) { - var v = node[1][j]; - var name = v[0]; - var value = v[1]; - if (!(name in data.vars)) { - if (value[0] != 'name') { - data.vars[name] = detectAsmCoercion(value); // detect by coercion - } else { - var origin = value[1]; - data.vars[name] = data.vars[origin] || ASM_INT; // detect by origin variable, or assume int for non-locals - } - } - } - unVarify(node[1], node); - */ } else if (type === 'call' && node[1][0] === 'function') { assert(!node[1][1]); // anonymous functions only data.inlines.push(node[1]); @@ -3721,7 +3717,7 @@ function minifyGlobals(ast) { var first = true; // do not minify initial 'var asm =' // find the globals traverse(ast, function(node, type) { - if (type === 'var') { + if (type === 'var' || type === 'const') { if (first) { first = false; return; @@ -4971,10 +4967,19 @@ function safeHeap(ast) { function optimizeFrounds(ast) { // collapse fround(fround(..)), which can happen due to elimination + // also emit f0 instead of fround(0) (except in returns) + var inReturn = false; function fix(node) { + if (node[0] === 'return') inReturn = true; traverseChildren(node, fix); - if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround' && node[2][0][0] === 'call' && node[2][0][1][0] === 'name' && node[2][0][1][1] === 'Math_fround') { - return node[2][0]; + if (node[0] === 'return') inReturn = false; + if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround') { + var arg = node[2][0]; + if (arg[0] === 'num') { + if (!inReturn && arg[1] === 0) return ['name', 'f0']; + } else if (arg[0] === 'call' && arg[1][0] === 'name' && arg[1][1] === 'Math_fround') { + return arg; + } } } traverseChildren(ast, fix); diff --git a/tools/test-js-optimizer-asm-pre-f32.js b/tools/test-js-optimizer-asm-pre-f32.js index 5471deeb8..be515b363 100644 --- a/tools/test-js-optimizer-asm-pre-f32.js +++ b/tools/test-js-optimizer-asm-pre-f32.js @@ -14,4 +14,10 @@ function dupe() { x = Math_fround(Math_fround(Math_fround(x))); x = Math_fround(Math_fround(Math_fround(Math_fround(x)))); } -// EMSCRIPTEN_GENERATED_FUNCTIONS: ["badf", "badf2", "dupe"] +function zeros(x) { + x = Math_fround(x); + var y = Math_fround(0); + print(Math_fround(y) + Math_fround(0)); + return Math_fround(0); // return needs to stay as is +} +// EMSCRIPTEN_GENERATED_FUNCTIONS: ["badf", "badf2", "dupe", "zeros"] diff --git a/tools/test-js-optimizer-asm-pre-output-f32.js b/tools/test-js-optimizer-asm-pre-output-f32.js index 19059619e..f0f2d0dae 100644 --- a/tools/test-js-optimizer-asm-pre-output-f32.js +++ b/tools/test-js-optimizer-asm-pre-output-f32.js @@ -4,7 +4,7 @@ function badf() { HEAP32[$gep23_asptr >> 2] = $9; } function badf2() { - var $9 = Math_fround(0); + var $9 = f0; $9 = Math_fround($8); HEAPF32[$gep23_asptr >> 2] = $9; } @@ -14,4 +14,10 @@ function dupe() { x = Math_fround(x); x = Math_fround(x); } +function zeros(x) { + x = Math_fround(x); + var y = f0; + print(Math_fround(y) + f0); + return Math_fround(0); +}