emit a global const for Math_fround(0) to avoid function call overheads in the fround polyfill

This commit is contained in:
Alon Zakai 2014-05-03 12:09:37 -07:00
Родитель 2308587a85
Коммит 77aa2a72f3
6 изменённых файлов: 54 добавлений и 30 удалений

2
emcc
Просмотреть файл

@ -1727,7 +1727,7 @@ try:
# with commaified code breaks late aggressive variable elimination)
if shared.Settings.SIMPLIFY_IFS and (debug_level == 0 or profiling) and shared.Settings.OUTLINING_LIMIT == 0: js_optimizer_queue += ['simplifyIfs']
if opt_level >= 3 and shared.Settings.PRECISE_F32: js_optimizer_queue += ['optimizeFrounds']
if shared.Settings.PRECISE_F32: js_optimizer_queue += ['optimizeFrounds']
if closure and not shared.Settings.ASM_JS:
flush_js_optimizer_queue()

Просмотреть файл

@ -1165,7 +1165,7 @@ var asm = (function(global, env, buffer) {
var nan = +env.NaN, inf = +env.Infinity;
var tempInt = 0, tempBigInt = 0, tempBigIntP = 0, tempBigIntS = 0, tempBigIntR = 0.0, tempBigIntI = 0, tempBigIntD = 0, tempValue = 0, tempDouble = 0.0;
''' + ''.join(['''
var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs] + [' var tempFloat = %s;\n' % ('Math_fround(0)' if settings.get('PRECISE_F32') else '0.0')] + ['''
var tempRet%d = 0;''' % i for i in range(10)]) + '\n' + asm_global_funcs] + [' var tempFloat = %s;\n' % ('Math_fround(0)' if settings.get('PRECISE_F32') else '0.0')] + ([' const f0 = Math_fround(0);\n'] if settings.get('PRECISE_F32') else []) + ['''
// EMSCRIPTEN_START_FUNCS
function stackAlloc(size) {
size = size|0;

Просмотреть файл

@ -124,13 +124,20 @@ var PRECISE_I32_MUL = 1; // If enabled, i32 multiplication is done with full pre
var PRECISE_F32 = 0; // 0: Use JS numbers for floating-point values. These are 64-bit and do not model C++
// floats exactly, which are 32-bit.
// 1: Model C++ floats precisely, using Math.fround, polyfilling when necessary. This
// can be slow if the polyfill is used on heavy float32 computation.
// can be slow if the polyfill is used on heavy float32 computation. See note on
// browser support below.
// 2: Model C++ floats precisely using Math.fround if available in the JS engine, otherwise
// use an empty polyfill. This will have much less of a speed penalty than using the full
// polyfill in cases where engine support is not present. In addition, we can
// remove the empty polyfill calls themselves on the client when generating html,
// which should mean that this gives you the best of both worlds of 0 and 1, and is
// therefore recommended.
// XXX Note: To optimize float32-using code, we use the 'const' keyword in the emitted
// code. This allows us to avoid unnecessary calls to Math.fround, which would
// slow down engines not yet supporting that function. 'const' is present in
// all modern browsers, including Firefox, Chrome and Safari, but in IE is only
// present in IE11 and above. Therefore if you need to support legacy versions of
// IE, you should not enable PRECISE_F32 1 or 2.
var SIMD = 0; // Whether to emit SIMD code ( https://github.com/johnmccutchan/ecmascript_simd )
var CLOSURE_COMPILER = 0; // Whether closure compiling is being run on this output

Просмотреть файл

@ -1342,13 +1342,21 @@ var ASM_DOUBLE = 1;
var ASM_FLOAT = 2;
var ASM_NONE = 3;
function detectAsmCoercion(node, asmInfo) {
var ASM_FLOAT_ZERO = null; // TODO: share the entire node?
function detectAsmCoercion(node, asmInfo, inVarDef) {
// for params, +x vs x|0, for vars, 0.0 vs 0
if (node[0] === 'num' && node[1].toString().indexOf('.') >= 0) return ASM_DOUBLE;
if (node[0] === 'unary-prefix') return ASM_DOUBLE;
if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround') return ASM_FLOAT;
if (asmInfo && node[0] == 'name') return getAsmType(node[1], asmInfo);
if (node[0] === 'name') return ASM_NONE;
if (node[0] === 'name') {
if (!inVarDef) return ASM_NONE;
// We are in a variable definition, where Math_fround(0) optimized into a global constant becomes f0 = Math_fround(0)
if (!ASM_FLOAT_ZERO) ASM_FLOAT_ZERO = node[1];
else assert(ASM_FLOAT_ZERO === node[1]);
return ASM_FLOAT;
}
return ASM_INT;
}
@ -1366,7 +1374,13 @@ function makeAsmVarDef(v, type) {
switch (type) {
case ASM_INT: return [v, ['num', 0]];
case ASM_DOUBLE: return [v, ['unary-prefix', '+', ['num', 0]]];
case ASM_FLOAT: return [v, ['call', ['name', 'Math_fround'], [['num', 0]]]];
case ASM_FLOAT: {
if (ASM_FLOAT_ZERO) {
return [v, ['name', ASM_FLOAT_ZERO]];
} else {
return [v, ['call', ['name', 'Math_fround'], [['num', 0]]]];
}
}
default: throw 'wha? ' + JSON.stringify([node, type]) + new Error().stack;
}
}
@ -1409,9 +1423,7 @@ function normalizeAsm(func) {
var name = v[0];
var value = v[1];
if (!(name in data.vars)) {
assert(value[0] === 'num' || (value[0] === 'unary-prefix' && value[2][0] === 'num') // must be valid coercion no-op
|| (value[0] === 'call' && value[1][0] === 'name' && value[1][1] === 'Math_fround'));
data.vars[name] = detectAsmCoercion(value);
data.vars[name] = detectAsmCoercion(value, null, true);
v.length = 1; // make an un-assigning var
} else {
assert(j === 0, 'cannot break in the middle');
@ -1425,22 +1437,6 @@ function normalizeAsm(func) {
traverse(stats[i], function(node, type) {
if (type === 'var') {
assert(0, 'should be no vars to fix! ' + func[1] + ' : ' + JSON.stringify(node));
/*
for (var j = 0; j < node[1].length; j++) {
var v = node[1][j];
var name = v[0];
var value = v[1];
if (!(name in data.vars)) {
if (value[0] != 'name') {
data.vars[name] = detectAsmCoercion(value); // detect by coercion
} else {
var origin = value[1];
data.vars[name] = data.vars[origin] || ASM_INT; // detect by origin variable, or assume int for non-locals
}
}
}
unVarify(node[1], node);
*/
} else if (type === 'call' && node[1][0] === 'function') {
assert(!node[1][1]); // anonymous functions only
data.inlines.push(node[1]);
@ -3721,7 +3717,7 @@ function minifyGlobals(ast) {
var first = true; // do not minify initial 'var asm ='
// find the globals
traverse(ast, function(node, type) {
if (type === 'var') {
if (type === 'var' || type === 'const') {
if (first) {
first = false;
return;
@ -4971,10 +4967,19 @@ function safeHeap(ast) {
function optimizeFrounds(ast) {
// collapse fround(fround(..)), which can happen due to elimination
// also emit f0 instead of fround(0) (except in returns)
var inReturn = false;
function fix(node) {
if (node[0] === 'return') inReturn = true;
traverseChildren(node, fix);
if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround' && node[2][0][0] === 'call' && node[2][0][1][0] === 'name' && node[2][0][1][1] === 'Math_fround') {
return node[2][0];
if (node[0] === 'return') inReturn = false;
if (node[0] === 'call' && node[1][0] === 'name' && node[1][1] === 'Math_fround') {
var arg = node[2][0];
if (arg[0] === 'num') {
if (!inReturn && arg[1] === 0) return ['name', 'f0'];
} else if (arg[0] === 'call' && arg[1][0] === 'name' && arg[1][1] === 'Math_fround') {
return arg;
}
}
}
traverseChildren(ast, fix);

Просмотреть файл

@ -14,4 +14,10 @@ function dupe() {
x = Math_fround(Math_fround(Math_fround(x)));
x = Math_fround(Math_fround(Math_fround(Math_fround(x))));
}
// EMSCRIPTEN_GENERATED_FUNCTIONS: ["badf", "badf2", "dupe"]
function zeros(x) {
x = Math_fround(x);
var y = Math_fround(0);
print(Math_fround(y) + Math_fround(0));
return Math_fround(0); // return needs to stay as is
}
// EMSCRIPTEN_GENERATED_FUNCTIONS: ["badf", "badf2", "dupe", "zeros"]

Просмотреть файл

@ -4,7 +4,7 @@ function badf() {
HEAP32[$gep23_asptr >> 2] = $9;
}
function badf2() {
var $9 = Math_fround(0);
var $9 = f0;
$9 = Math_fround($8);
HEAPF32[$gep23_asptr >> 2] = $9;
}
@ -14,4 +14,10 @@ function dupe() {
x = Math_fround(x);
x = Math_fround(x);
}
function zeros(x) {
x = Math_fround(x);
var y = f0;
print(Math_fround(y) + f0);
return Math_fround(0);
}