diff --git a/src/analyzer.js b/src/analyzer.js index b43711d15..a7e38a519 100644 --- a/src/analyzer.js +++ b/src/analyzer.js @@ -18,6 +18,7 @@ function cleanFunc(func) { // Handy sets var BRANCH_INVOKE = set('branch', 'invoke'); +var SIDE_EFFECT_CAUSERS = set('call', 'invoke'); // Analyzer @@ -265,8 +266,6 @@ function analyzer(data) { // Function locals item.functions.forEach(function(func) { - dprint('vars', 'Analyzing variables in ' + func.ident); - func.variables = {}; // LLVM is SSA, so we always have a single assignment/write. We care about @@ -280,22 +279,21 @@ function analyzer(data) { type: param.type, origin: 'funcparam', lineNum: func.lineNum, - uses: null + rawLinesIndex: -1 }; } }); // Normal variables - func.lines.forEach(function(item) { + func.lines.forEach(function(item, i) { if (item.intertype === 'assign') { var variable = func.variables[item.ident] = { ident: item.ident, type: item.value.type, origin: item.value.intertype, lineNum: item.lineNum, - uses: item.uses + rawLinesIndex: i }; - assert(isNumber(variable.uses), 'Failed to find the # of uses of var: ' + item.ident); if (variable.origin === 'alloca') { variable.allocatedNum = item.value.allocatedNum; } @@ -324,41 +322,74 @@ function analyzer(data) { }); } + // Analyze variable uses + + function analyzeVariableUses() { + dprint('vars', 'Analyzing variables for ' + func.ident + '\n'); + + for (vname in func.variables) { + var variable = func.variables[vname]; + + // Whether the value itself is used. For an int, always yes. For a pointer, + // we might never use the pointer's value - we might always just store to it / + // read from it. If so, then we can optimize away the pointer. + variable.hasValueTaken = false; + + variable.pointingLevels = pointingLevels(variable.type); + + variable.uses = 0; + } + + // TODO: improve the analysis precision. bitcast, for example, means we take the value, but perhaps we only use it to load/store + var inNoop = 0; + func.lines.forEach(function(line) { + walkInterdata(line, function(item) { + if (item.intertype == 'noop') inNoop++; + if (!inNoop) { + if (item.ident in func.variables && item.intertype != 'assign') { + func.variables[item.ident].uses++; + + if (item.intertype != 'load' && item.intertype != 'store') { + func.variables[item.ident].hasValueTaken = true; + } + } + } + }, function(item) { + if (item.intertype == 'noop') inNoop--; + }); + }); + } + + // Filter out no longer used variables, collapsing more as we go + while (true) { + analyzeVariableUses(); + + var recalc = false; + + keys(func.variables).forEach(function(vname) { + var variable = func.variables[vname]; + if (variable.uses == 0 && variable.origin != 'funcparam') { + // Eliminate this variable if we can + var sideEffects = false; + walkInterdata(func.lines[variable.rawLinesIndex].value, function(item) { + if (item.intertype in SIDE_EFFECT_CAUSERS) sideEffects = true; + }); + if (!sideEffects) { + dprint('vars', 'Eliminating ' + vname); + func.lines[variable.rawLinesIndex].intertype = func.lines[variable.rawLinesIndex].value.intertype = 'noop'; + delete func.variables[vname]; + recalc = true; + } + } + }); + + if (!recalc) break; + } + + // Decision time + for (vname in func.variables) { var variable = func.variables[vname]; - - // Whether the value itself is used. For an int, always yes. For a pointer, - // we might never use the pointer's value - we might always just store to it / - // read from it. If so, then we can optimize away the pointer. - variable.hasValueTaken = false; - // Whether our address was used. If not, then we do not need to bother with - // implementing this variable in a way that other functions can access it. - variable.hasAddrTaken = false; - - variable.pointingLevels = pointingLevels(variable.type); - - // Analysis! - - if (variable.pointingLevels > 0) { - // Pointers - variable.loads = 0; - variable.stores = 0; - - func.lines.forEach(function(line) { - if (line.intertype == 'store' && line.ident == vname) { - variable.stores ++; - } else if (line.intertype == 'assign' && line.value.intertype == 'load' && line.value.ident == vname) { - variable.loads ++; - } - }); - - variable.otherUses = variable.uses - variable.loads - variable.stores; - if (variable.otherUses > 0) - variable.hasValueTaken = true; - } - - // Decision time - var pointedType = pointingLevels(variable.type) > 0 ? removePointing(variable.type) : null; if (variable.origin == 'getelementptr') { // Use our implementation that emulates pointers etc. @@ -369,10 +400,10 @@ function analyzer(data) { variable.impl = VAR_EMULATED; } else if (variable.type == 'i64*' && I64_MODE == 1) { variable.impl = VAR_EMULATED; - } else if (MICRO_OPTS && variable.pointingLevels === 0 && !variable.hasAddrTaken) { + } else if (MICRO_OPTS && variable.pointingLevels === 0) { // A simple int value, can be implemented as a native variable variable.impl = VAR_NATIVE; - } else if (MICRO_OPTS && variable.origin === 'alloca' && !variable.hasAddrTaken && !variable.hasValueTaken && + } else if (MICRO_OPTS && variable.origin === 'alloca' && !variable.hasValueTaken && variable.allocatedNum === 1 && (Runtime.isNumberType(pointedType) || Runtime.isPointerType(pointedType))) { // A pointer to a value which is only accessible through this pointer. Basically @@ -423,7 +454,7 @@ function analyzer(data) { item.functions.forEach(function(func) { func.lines.forEach(function(line, i) { if (line.intertype === 'assign' && line.value.intertype === 'load') { - var data = func.variables[line.ident] + var data = func.variables[line.ident]; if (data.type === 'i1') { line.value.unsigned = true; return; diff --git a/src/compiler.js b/src/compiler.js index 7e3839b65..2dea57979 100644 --- a/src/compiler.js +++ b/src/compiler.js @@ -180,9 +180,10 @@ if (FAKE_X86_FP80) { var lines = raw.split('\n'); raw = null; -// Parse metadata +// Pre-process the LLVM assembly Debugging.handleMetadata(lines); +PreProcessor.eliminateUnneededIntrinsics(lines); // Do it diff --git a/src/intertyper.js b/src/intertyper.js index 341b2e1d7..ba9e3f0d2 100644 --- a/src/intertyper.js +++ b/src/intertyper.js @@ -520,7 +520,7 @@ function intertyper(data, sidePass, baseLineNums) { }); } } else { - if (!item.tokens[3]) throw 'Did you run llvm-dis with -show-annotations? (b)'; + if (!item.tokens[3]) throw 'Did you run llvm-dis with -show-annotations? (b)'; // XXX: do we still need annotations? if (item.tokens[3].text == 'c') item.tokens.splice(3, 1); if (item.tokens[3].text in PARSABLE_LLVM_FUNCTIONS) { @@ -572,13 +572,11 @@ function intertyper(data, sidePass, baseLineNums) { substrate.addActor('Assign', { processItem: function(item) { var opIndex = findTokenText(item, '='); - if (!item.tokens.slice(-1)[0].item) throw 'Did you run llvm-dis with -show-annotations?'; var commentIndex = getTokenIndexByText(item.tokens, ';'); var pair = splitItem({ intertype: 'assign', ident: toNiceIdent(combineTokens(item.tokens.slice(0, opIndex)).text), lineNum: item.lineNum, - uses: parseInt(item.tokens[commentIndex+1].item.tokens[0].text.split('=')[1]) }, 'value'); this.forwardItem(pair.parent, 'Reintegrator'); pair.child.indent = -1; @@ -972,7 +970,7 @@ function intertyper(data, sidePass, baseLineNums) { processItem: function(item) { var ret = { intertype: 'indirectbr', - pointer: parseLLVMSegment(splitTokenList(item.tokens.slice(1))[0]), + value: parseLLVMSegment(splitTokenList(item.tokens.slice(1))[0]), type: item.tokens[1].text, lineNum: item.lineNum }; diff --git a/src/jsifier.js b/src/jsifier.js index 984604929..0f90ade1b 100644 --- a/src/jsifier.js +++ b/src/jsifier.js @@ -706,6 +706,9 @@ function JSify(data, functionsOnly, givenFunctions) { } }); } + makeFuncLineActor('noop', function(item) { + return ';'; + }); makeFuncLineActor('var', function(item) { // assigns into phis become simple vars when MICRO_OPTS return 'var ' + item.ident + ';'; }); @@ -953,7 +956,7 @@ function JSify(data, functionsOnly, givenFunctions) { return ret + item.ident + '.f' + item.indexes[0][0].text + ' = ' + finalizeLLVMParameter(item.value) + ', ' + item.ident + ')'; }); makeFuncLineActor('indirectbr', function(item) { - return makeBranch(finalizeLLVMParameter(item.pointer), item.currLabelId, true); + return makeBranch(finalizeLLVMParameter(item.value), item.currLabelId, true); }); makeFuncLineActor('alloca', function(item) { if (typeof item.allocatedIndex === 'number') { diff --git a/src/modules.js b/src/modules.js index 49f93e616..0bc8894c1 100644 --- a/src/modules.js +++ b/src/modules.js @@ -209,6 +209,28 @@ var Debugging = { } }; +var PreProcessor = { + eliminateUnneededIntrinsics: function(lines) { + // LLVM sometimes aggresively adds lifetime annotations, for example + // + // %0 = bitcast %"class.std::__1::__tree"** %this.addr.i to i8* ; [#uses=1 type=i8*] + // call void @llvm.lifetime.start(i64 -1, i8* %0) nounwind + // [..] + // %6 = bitcast float* %__x.addr.i to i8* ; [#uses=1 type=i8*] + // call void @llvm.lifetime.end(i64 -1, i8* %6) nounwind + // + // This greatly hurts us if we do not eliminate it ahead of time, because while we + // will correctly do nothing for the lifetime intrinsic itself, the bitcast of the + // parameter to it will prevent nativization of the variable being cast (!) + for (var i = 0; i < lines.length; i++) { + var line = lines[i]; + if (/call void @llvm.lifetime.(start|end)\(i\d+ -1, i8\* %(\d+)\).*/.exec(line)) { + lines[i] = ';'; + } + } + } +}; + var Variables = { globals: {} }; diff --git a/src/parseTools.js b/src/parseTools.js index d0193a12a..0b9d8f2f3 100644 --- a/src/parseTools.js +++ b/src/parseTools.js @@ -410,7 +410,7 @@ function parseLLVMSegment(segment) { Types.needAnalysis[type] = 0; return { intertype: 'structvalue', - values: splitTokenList(segment[1].tokens).map(parseLLVMSegment), + params: splitTokenList(segment[1].tokens).map(parseLLVMSegment), type: type }; } else if (segment[0].text in PARSABLE_LLVM_FUNCTIONS) { @@ -1458,7 +1458,7 @@ function finalizeLLVMParameter(param, noIndexizeFunctions) { } ret = parseNumerical(ret); } else if (param.intertype == 'structvalue') { - ret = makeLLVMStruct(param.values.map(function(value) { return finalizeLLVMParameter(value, noIndexizeFunctions) })); + ret = makeLLVMStruct(param.params.map(function(value) { return finalizeLLVMParameter(value, noIndexizeFunctions) })); } else if (param.intertype === 'blockaddress') { return finalizeBlockAddress(param); } else if (param.intertype === 'type') { @@ -1793,7 +1793,7 @@ function processMathop(item) { // Walks through some intertype data, calling a function at every item. If // the function returns true, will stop the walk. -// TODO: Use this in analyzer, possibly also in jsifier +// TODO: Use this more in analyzer, possibly also in jsifier function walkInterdata(item, pre, post, obj) { if (!item || !item.intertype) return false; if (pre && pre(item, obj)) return true; diff --git a/tests/cases/lifetime.ll b/tests/cases/lifetime.ll new file mode 100644 index 000000000..dc6d471db --- /dev/null +++ b/tests/cases/lifetime.ll @@ -0,0 +1,42 @@ +; ModuleID = '/tmp/emscripten/tmp/src.cpp.o' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +%struct.vec2 = type { float, float } + +@.str = private unnamed_addr constant [15 x i8] c"hello, world!\0A\00" ; [#uses=1] + +; [#uses=1] +declare i32 @printf(i8* noalias, ...) + +define linkonce_odr float @vec2Length(%struct.vec2* %this) nounwind align 2 { +entry: + %__first.addr.i = alloca %struct.b2Pair.5*, align 4 ; [#uses=3 type=%struct.b2Pair.5**] + %__last.addr.i = alloca %struct.b2Pair.5*, align 4 ; [#uses=3 type=%struct.b2Pair.5**] + %__comp.addr.i = alloca %struct.b2Pair.5*, align 4 ; [#uses=2 type=%struct.b2Pair.5**] + %13 = bitcast %struct.vec2** %__first.addr.i to i8* ; [#uses=1 type=i8*] + call void @llvm.lifetime.start(i64 -1, i8* %13) + %14 = bitcast %struct.vec2** %__last.addr.i to i8* ; [#uses=1 type=i8*] + call void @llvm.lifetime.start(i64 -1, i8* %14) + %15 = bitcast i1 (%struct.vec2*, %struct.vec2*)** %__comp.addr.i to i8* ; [#uses=1 type=i8*] + call void @llvm.lifetime.start(i64 -1, i8* %15) + store %struct.vec2* %10, %struct.vec2** %__first.addr.i, align 4 + store %struct.vec2* %add.ptr, %struct.vec2** %__last.addr.i, align 4 + %18 = bitcast %struct.vec2** %__first.addr.i to i8* ; [#uses=1 type=i8*] + call void @llvm.lifetime.end(i64 -1, i8* %18) + %19 = bitcast %struct.vec2** %__last.addr.i to i8* ; [#uses=1 type=i8*] + call void @llvm.lifetime.end(i64 -1, i8* %19) + %20 = bitcast i1 (%struct.vec2*, %struct.vec2*)** %__comp.addr.i to i8* ; [#uses=1 type=i8*] + call void @llvm.lifetime.end(i64 -1, i8* %20) +} + +define i32 @main() { +entry: + %retval = alloca i32, align 4 ; [#uses=1] + store i32 0, i32* %retval + %b = getelementptr inbounds i32* %retval, i32 0, i32 1 ; [#uses=1] ; force __stackBase__ to appear! + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0)) ; [#uses=0] + call i32 (i32)* @nonexistant(i32 %b) ; keep %b alive + ret i32 0 +} + diff --git a/tests/cases/lifetime.py b/tests/cases/lifetime.py new file mode 100644 index 000000000..3bb9cbacc --- /dev/null +++ b/tests/cases/lifetime.py @@ -0,0 +1,6 @@ +if Settings.MICRO_OPTS: + assert '__stackBase__' in generated, 'There should be some stack activity (without which, we cannot do the next checks)' + assert '__stackBase__+4' not in generated, 'All variables should have been nativized' + assert '__stackBase__+8' not in generated, 'All variables should have been nativized' + assert 'comp_addr' not in generated, 'This variable should have been eliminated during analysis' + diff --git a/tests/runner.py b/tests/runner.py index c8f232371..e908d2948 100644 --- a/tests/runner.py +++ b/tests/runner.py @@ -3895,7 +3895,7 @@ at function.:blag for name in glob.glob(path_from_root('tests', 'cases', '*.ll')): shortname = name.replace('.ll', '') - #if 'break' not in shortname: continue + if '' not in shortname: continue print "Testing case '%s'..." % shortname output_file = path_from_root('tests', 'cases', shortname + '.txt') if Settings.QUANTUM_SIZE == 1: @@ -3908,6 +3908,11 @@ at function.:blag output = 'hello, world!' if output.rstrip() != 'skip': self.do_ll_run(path_from_root('tests', 'cases', name), output) + # Optional source checking, a python script that gets a global generated with the source + src_checker = path_from_root('tests', 'cases', shortname + '.py') + if os.path.exists(src_checker): + generated = open('src.cpp.o.js').read() + exec(open(src_checker).read()) # Autodebug the code def do_autodebug(self, filename):