emit a global const for Math_fround(0) to avoid function call overheads in the fround polyfill
This commit is contained in:
Родитель
2308587a85
Коммит
77aa2a72f3
2
emcc
2
emcc
|
@ -1727,7 +1727,7 @@ try:
|
|||
# with commaified code breaks late aggressive variable elimination)
|
||||
if shared.Settings.SIMPLIFY_IFS and (debug_level == 0 or profiling) and shared.Settings.OUTLINING_LIMIT == 0: js_optimizer_queue += ['simplifyIfs']
|
||||
|
||||
if opt_level >= 3 and shared.Settings.PRECISE_F32: js_optimizer_queue += ['optimizeFrounds']
|
||||
if shared.Settings.PRECISE_F32: js_optimizer_queue += ['optimizeFrounds']
|
||||
|
||||
if closure and not shared.Settings.ASM_JS:
|
||||
flush_js_optimizer_queue()
|
||||
|
|
|
@ -1165,7 +1165,7 @@ var asm = (function(global, env, buffer) {
|
|||
var nan = +env.NaN, inf = +env.Infinity;
|
||||
var tempInt = 0, tempBigInt = 0, tempBigIntP = 0, tempBigIntS = 0, tempBigIntR = 0.0, tempBigIntI = 0, tempBigIntD = 0, tempValue = 0, tempDouble = 0.0;
|
||||
''' + ''.join(['''
|
||||
var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs] + [' var tempFloat = %s;\n' % ('Math_fround(0)' if settings.get('PRECISE_F32') else '0.0')] + ['''
|
||||
var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs] + [' var tempFloat = %s;\n' % ('Math_fround(0)' if settings.get('PRECISE_F32') else '0.0')] + ([' const f0 = Math_fround(0);\n'] if settings.get('PRECISE_F32') else []) + ['''
|
||||
// EMSCRIPTEN_START_FUNCS
|
||||
function stackAlloc(size) {
|
||||
size = size|0;
|
||||
|
|
|
@ -124,13 +124,20 @@ var PRECISE_I32_MUL = 1; // If enabled, i32 multiplication is done with full pre
|
|||
var PRECISE_F32 = 0; // 0: Use JS numbers for floating-point values. These are 64-bit and do not model C++
|
||||
// floats exactly, which are 32-bit.
|
||||
// 1: Model C++ floats precisely, using Math.fround, polyfilling when necessary. This
|
||||
// can be slow if the polyfill is used on heavy float32 computation.
|
||||
// can be slow if the polyfill is used on heavy float32 computation. See note on
|
||||
// browser support below.
|
||||
// 2: Model C++ floats precisely using Math.fround if available in the JS engine, otherwise
|
||||
// use an empty polyfill. This will have much less of a speed penalty than using the full
|
||||
// polyfill in cases where engine support is not present. In addition, we can
|
||||
// remove the empty polyfill calls themselves on the client when generating html,
|
||||
// which should mean that this gives you the best of both worlds of 0 and 1, and is
|
||||
// therefore recommended.
|
||||
// XXX Note: To optimize float32-using code, we use the 'const' keyword in the emitted
|
||||
// code. This allows us to avoid unnecessary calls to Math.fround, which would
|
||||
// slow down engines not yet supporting that function. 'const' is present in
|
||||
// all modern browsers, including Firefox, Chrome and Safari, but in IE is only
|
||||
// present in IE11 and above. Therefore if you need to support legacy versions of
|
||||
// IE, you should not enable PRECISE_F32 1 or 2.
|
||||
var SIMD = 0; // Whether to emit SIMD code ( https://github.com/johnmccutchan/ecmascript_simd )
|
||||
|
||||
var CLOSURE_COMPILER = 0; // Whether closure compiling is being run on this output
|
||||
|
|
|
@ -1342,13 +1342,21 @@ var ASM_DOUBLE = 1;
|
|||
var ASM_FLOAT = 2;
|
||||
var ASM_NONE = 3;
|
||||
|
||||
function detectAsmCoercion(node, asmInfo) {
|
||||
var ASM_FLOAT_ZERO = null; // TODO: share the entire node?
|
||||
|
||||
function detectAsmCoercion(node, asmInfo, inVarDef) {
|
||||
// for params, +x vs x|0, for vars, 0.0 vs 0
|
||||
if (node[0] === 'num' && node[1].toString().indexOf('.') >= 0) return ASM_DOUBLE;
|
||||
if (node[0] === 'unary-prefix') return ASM_DOUBLE;
|
||||
if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround') return ASM_FLOAT;
|
||||
if (asmInfo && node[0] == 'name') return getAsmType(node[1], asmInfo);
|
||||
if (node[0] === 'name') return ASM_NONE;
|
||||
if (node[0] === 'name') {
|
||||
if (!inVarDef) return ASM_NONE;
|
||||
// We are in a variable definition, where Math_fround(0) optimized into a global constant becomes f0 = Math_fround(0)
|
||||
if (!ASM_FLOAT_ZERO) ASM_FLOAT_ZERO = node[1];
|
||||
else assert(ASM_FLOAT_ZERO === node[1]);
|
||||
return ASM_FLOAT;
|
||||
}
|
||||
return ASM_INT;
|
||||
}
|
||||
|
||||
|
@ -1366,7 +1374,13 @@ function makeAsmVarDef(v, type) {
|
|||
switch (type) {
|
||||
case ASM_INT: return [v, ['num', 0]];
|
||||
case ASM_DOUBLE: return [v, ['unary-prefix', '+', ['num', 0]]];
|
||||
case ASM_FLOAT: return [v, ['call', ['name', 'Math_fround'], [['num', 0]]]];
|
||||
case ASM_FLOAT: {
|
||||
if (ASM_FLOAT_ZERO) {
|
||||
return [v, ['name', ASM_FLOAT_ZERO]];
|
||||
} else {
|
||||
return [v, ['call', ['name', 'Math_fround'], [['num', 0]]]];
|
||||
}
|
||||
}
|
||||
default: throw 'wha? ' + JSON.stringify([node, type]) + new Error().stack;
|
||||
}
|
||||
}
|
||||
|
@ -1409,9 +1423,7 @@ function normalizeAsm(func) {
|
|||
var name = v[0];
|
||||
var value = v[1];
|
||||
if (!(name in data.vars)) {
|
||||
assert(value[0] === 'num' || (value[0] === 'unary-prefix' && value[2][0] === 'num') // must be valid coercion no-op
|
||||
|| (value[0] === 'call' && value[1][0] === 'name' && value[1][1] === 'Math_fround'));
|
||||
data.vars[name] = detectAsmCoercion(value);
|
||||
data.vars[name] = detectAsmCoercion(value, null, true);
|
||||
v.length = 1; // make an un-assigning var
|
||||
} else {
|
||||
assert(j === 0, 'cannot break in the middle');
|
||||
|
@ -1425,22 +1437,6 @@ function normalizeAsm(func) {
|
|||
traverse(stats[i], function(node, type) {
|
||||
if (type === 'var') {
|
||||
assert(0, 'should be no vars to fix! ' + func[1] + ' : ' + JSON.stringify(node));
|
||||
/*
|
||||
for (var j = 0; j < node[1].length; j++) {
|
||||
var v = node[1][j];
|
||||
var name = v[0];
|
||||
var value = v[1];
|
||||
if (!(name in data.vars)) {
|
||||
if (value[0] != 'name') {
|
||||
data.vars[name] = detectAsmCoercion(value); // detect by coercion
|
||||
} else {
|
||||
var origin = value[1];
|
||||
data.vars[name] = data.vars[origin] || ASM_INT; // detect by origin variable, or assume int for non-locals
|
||||
}
|
||||
}
|
||||
}
|
||||
unVarify(node[1], node);
|
||||
*/
|
||||
} else if (type === 'call' && node[1][0] === 'function') {
|
||||
assert(!node[1][1]); // anonymous functions only
|
||||
data.inlines.push(node[1]);
|
||||
|
@ -3721,7 +3717,7 @@ function minifyGlobals(ast) {
|
|||
var first = true; // do not minify initial 'var asm ='
|
||||
// find the globals
|
||||
traverse(ast, function(node, type) {
|
||||
if (type === 'var') {
|
||||
if (type === 'var' || type === 'const') {
|
||||
if (first) {
|
||||
first = false;
|
||||
return;
|
||||
|
@ -4971,10 +4967,19 @@ function safeHeap(ast) {
|
|||
|
||||
function optimizeFrounds(ast) {
|
||||
// collapse fround(fround(..)), which can happen due to elimination
|
||||
// also emit f0 instead of fround(0) (except in returns)
|
||||
var inReturn = false;
|
||||
function fix(node) {
|
||||
if (node[0] === 'return') inReturn = true;
|
||||
traverseChildren(node, fix);
|
||||
if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround' && node[2][0][0] === 'call' && node[2][0][1][0] === 'name' && node[2][0][1][1] === 'Math_fround') {
|
||||
return node[2][0];
|
||||
if (node[0] === 'return') inReturn = false;
|
||||
if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround') {
|
||||
var arg = node[2][0];
|
||||
if (arg[0] === 'num') {
|
||||
if (!inReturn && arg[1] === 0) return ['name', 'f0'];
|
||||
} else if (arg[0] === 'call' && arg[1][0] === 'name' && arg[1][1] === 'Math_fround') {
|
||||
return arg;
|
||||
}
|
||||
}
|
||||
}
|
||||
traverseChildren(ast, fix);
|
||||
|
|
|
@ -14,4 +14,10 @@ function dupe() {
|
|||
x = Math_fround(Math_fround(Math_fround(x)));
|
||||
x = Math_fround(Math_fround(Math_fround(Math_fround(x))));
|
||||
}
|
||||
// EMSCRIPTEN_GENERATED_FUNCTIONS: ["badf", "badf2", "dupe"]
|
||||
function zeros(x) {
|
||||
x = Math_fround(x);
|
||||
var y = Math_fround(0);
|
||||
print(Math_fround(y) + Math_fround(0));
|
||||
return Math_fround(0); // return needs to stay as is
|
||||
}
|
||||
// EMSCRIPTEN_GENERATED_FUNCTIONS: ["badf", "badf2", "dupe", "zeros"]
|
||||
|
|
|
@ -4,7 +4,7 @@ function badf() {
|
|||
HEAP32[$gep23_asptr >> 2] = $9;
|
||||
}
|
||||
function badf2() {
|
||||
var $9 = Math_fround(0);
|
||||
var $9 = f0;
|
||||
$9 = Math_fround($8);
|
||||
HEAPF32[$gep23_asptr >> 2] = $9;
|
||||
}
|
||||
|
@ -14,4 +14,10 @@ function dupe() {
|
|||
x = Math_fround(x);
|
||||
x = Math_fround(x);
|
||||
}
|
||||
function zeros(x) {
|
||||
x = Math_fround(x);
|
||||
var y = f0;
|
||||
print(Math_fround(y) + f0);
|
||||
return Math_fround(0);
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче