rewrite variable analysis code. eliminate unused variables without side effects, and improve nativization logic.
This commit is contained in:
Родитель
ad34dff9ae
Коммит
6259358a5a
115
src/analyzer.js
115
src/analyzer.js
|
@ -18,6 +18,7 @@ function cleanFunc(func) {
|
|||
// Handy sets
|
||||
|
||||
var BRANCH_INVOKE = set('branch', 'invoke');
|
||||
var SIDE_EFFECT_CAUSERS = set('call', 'invoke');
|
||||
|
||||
// Analyzer
|
||||
|
||||
|
@ -265,8 +266,6 @@ function analyzer(data) {
|
|||
// Function locals
|
||||
|
||||
item.functions.forEach(function(func) {
|
||||
dprint('vars', 'Analyzing variables in ' + func.ident);
|
||||
|
||||
func.variables = {};
|
||||
|
||||
// LLVM is SSA, so we always have a single assignment/write. We care about
|
||||
|
@ -280,22 +279,21 @@ function analyzer(data) {
|
|||
type: param.type,
|
||||
origin: 'funcparam',
|
||||
lineNum: func.lineNum,
|
||||
uses: null
|
||||
rawLinesIndex: -1
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
// Normal variables
|
||||
func.lines.forEach(function(item) {
|
||||
func.lines.forEach(function(item, i) {
|
||||
if (item.intertype === 'assign') {
|
||||
var variable = func.variables[item.ident] = {
|
||||
ident: item.ident,
|
||||
type: item.value.type,
|
||||
origin: item.value.intertype,
|
||||
lineNum: item.lineNum,
|
||||
uses: item.uses
|
||||
rawLinesIndex: i
|
||||
};
|
||||
assert(isNumber(variable.uses), 'Failed to find the # of uses of var: ' + item.ident);
|
||||
if (variable.origin === 'alloca') {
|
||||
variable.allocatedNum = item.value.allocatedNum;
|
||||
}
|
||||
|
@ -324,41 +322,74 @@ function analyzer(data) {
|
|||
});
|
||||
}
|
||||
|
||||
// Analyze variable uses
|
||||
|
||||
function analyzeVariableUses() {
|
||||
dprint('vars', 'Analyzing variables for ' + func.ident + '\n');
|
||||
|
||||
for (vname in func.variables) {
|
||||
var variable = func.variables[vname];
|
||||
|
||||
// Whether the value itself is used. For an int, always yes. For a pointer,
|
||||
// we might never use the pointer's value - we might always just store to it /
|
||||
// read from it. If so, then we can optimize away the pointer.
|
||||
variable.hasValueTaken = false;
|
||||
|
||||
variable.pointingLevels = pointingLevels(variable.type);
|
||||
|
||||
variable.uses = 0;
|
||||
}
|
||||
|
||||
// TODO: improve the analysis precision. bitcast, for example, means we take the value, but perhaps we only use it to load/store
|
||||
var inNoop = 0;
|
||||
func.lines.forEach(function(line) {
|
||||
walkInterdata(line, function(item) {
|
||||
if (item.intertype == 'noop') inNoop++;
|
||||
if (!inNoop) {
|
||||
if (item.ident in func.variables && item.intertype != 'assign') {
|
||||
func.variables[item.ident].uses++;
|
||||
|
||||
if (item.intertype != 'load' && item.intertype != 'store') {
|
||||
func.variables[item.ident].hasValueTaken = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}, function(item) {
|
||||
if (item.intertype == 'noop') inNoop--;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Filter out no longer used variables, collapsing more as we go
|
||||
while (true) {
|
||||
analyzeVariableUses();
|
||||
|
||||
var recalc = false;
|
||||
|
||||
keys(func.variables).forEach(function(vname) {
|
||||
var variable = func.variables[vname];
|
||||
if (variable.uses == 0 && variable.origin != 'funcparam') {
|
||||
// Eliminate this variable if we can
|
||||
var sideEffects = false;
|
||||
walkInterdata(func.lines[variable.rawLinesIndex].value, function(item) {
|
||||
if (item.intertype in SIDE_EFFECT_CAUSERS) sideEffects = true;
|
||||
});
|
||||
if (!sideEffects) {
|
||||
dprint('vars', 'Eliminating ' + vname);
|
||||
func.lines[variable.rawLinesIndex].intertype = func.lines[variable.rawLinesIndex].value.intertype = 'noop';
|
||||
delete func.variables[vname];
|
||||
recalc = true;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (!recalc) break;
|
||||
}
|
||||
|
||||
// Decision time
|
||||
|
||||
for (vname in func.variables) {
|
||||
var variable = func.variables[vname];
|
||||
|
||||
// Whether the value itself is used. For an int, always yes. For a pointer,
|
||||
// we might never use the pointer's value - we might always just store to it /
|
||||
// read from it. If so, then we can optimize away the pointer.
|
||||
variable.hasValueTaken = false;
|
||||
// Whether our address was used. If not, then we do not need to bother with
|
||||
// implementing this variable in a way that other functions can access it.
|
||||
variable.hasAddrTaken = false;
|
||||
|
||||
variable.pointingLevels = pointingLevels(variable.type);
|
||||
|
||||
// Analysis!
|
||||
|
||||
if (variable.pointingLevels > 0) {
|
||||
// Pointers
|
||||
variable.loads = 0;
|
||||
variable.stores = 0;
|
||||
|
||||
func.lines.forEach(function(line) {
|
||||
if (line.intertype == 'store' && line.ident == vname) {
|
||||
variable.stores ++;
|
||||
} else if (line.intertype == 'assign' && line.value.intertype == 'load' && line.value.ident == vname) {
|
||||
variable.loads ++;
|
||||
}
|
||||
});
|
||||
|
||||
variable.otherUses = variable.uses - variable.loads - variable.stores;
|
||||
if (variable.otherUses > 0)
|
||||
variable.hasValueTaken = true;
|
||||
}
|
||||
|
||||
// Decision time
|
||||
|
||||
var pointedType = pointingLevels(variable.type) > 0 ? removePointing(variable.type) : null;
|
||||
if (variable.origin == 'getelementptr') {
|
||||
// Use our implementation that emulates pointers etc.
|
||||
|
@ -369,10 +400,10 @@ function analyzer(data) {
|
|||
variable.impl = VAR_EMULATED;
|
||||
} else if (variable.type == 'i64*' && I64_MODE == 1) {
|
||||
variable.impl = VAR_EMULATED;
|
||||
} else if (MICRO_OPTS && variable.pointingLevels === 0 && !variable.hasAddrTaken) {
|
||||
} else if (MICRO_OPTS && variable.pointingLevels === 0) {
|
||||
// A simple int value, can be implemented as a native variable
|
||||
variable.impl = VAR_NATIVE;
|
||||
} else if (MICRO_OPTS && variable.origin === 'alloca' && !variable.hasAddrTaken && !variable.hasValueTaken &&
|
||||
} else if (MICRO_OPTS && variable.origin === 'alloca' && !variable.hasValueTaken &&
|
||||
variable.allocatedNum === 1 &&
|
||||
(Runtime.isNumberType(pointedType) || Runtime.isPointerType(pointedType))) {
|
||||
// A pointer to a value which is only accessible through this pointer. Basically
|
||||
|
@ -423,7 +454,7 @@ function analyzer(data) {
|
|||
item.functions.forEach(function(func) {
|
||||
func.lines.forEach(function(line, i) {
|
||||
if (line.intertype === 'assign' && line.value.intertype === 'load') {
|
||||
var data = func.variables[line.ident]
|
||||
var data = func.variables[line.ident];
|
||||
if (data.type === 'i1') {
|
||||
line.value.unsigned = true;
|
||||
return;
|
||||
|
|
|
@ -180,9 +180,10 @@ if (FAKE_X86_FP80) {
|
|||
var lines = raw.split('\n');
|
||||
raw = null;
|
||||
|
||||
// Parse metadata
|
||||
// Pre-process the LLVM assembly
|
||||
|
||||
Debugging.handleMetadata(lines);
|
||||
PreProcessor.eliminateUnneededIntrinsics(lines);
|
||||
|
||||
// Do it
|
||||
|
||||
|
|
|
@ -520,7 +520,7 @@ function intertyper(data, sidePass, baseLineNums) {
|
|||
});
|
||||
}
|
||||
} else {
|
||||
if (!item.tokens[3]) throw 'Did you run llvm-dis with -show-annotations? (b)';
|
||||
if (!item.tokens[3]) throw 'Did you run llvm-dis with -show-annotations? (b)'; // XXX: do we still need annotations?
|
||||
if (item.tokens[3].text == 'c')
|
||||
item.tokens.splice(3, 1);
|
||||
if (item.tokens[3].text in PARSABLE_LLVM_FUNCTIONS) {
|
||||
|
@ -572,13 +572,11 @@ function intertyper(data, sidePass, baseLineNums) {
|
|||
substrate.addActor('Assign', {
|
||||
processItem: function(item) {
|
||||
var opIndex = findTokenText(item, '=');
|
||||
if (!item.tokens.slice(-1)[0].item) throw 'Did you run llvm-dis with -show-annotations?';
|
||||
var commentIndex = getTokenIndexByText(item.tokens, ';');
|
||||
var pair = splitItem({
|
||||
intertype: 'assign',
|
||||
ident: toNiceIdent(combineTokens(item.tokens.slice(0, opIndex)).text),
|
||||
lineNum: item.lineNum,
|
||||
uses: parseInt(item.tokens[commentIndex+1].item.tokens[0].text.split('=')[1])
|
||||
}, 'value');
|
||||
this.forwardItem(pair.parent, 'Reintegrator');
|
||||
pair.child.indent = -1;
|
||||
|
@ -972,7 +970,7 @@ function intertyper(data, sidePass, baseLineNums) {
|
|||
processItem: function(item) {
|
||||
var ret = {
|
||||
intertype: 'indirectbr',
|
||||
pointer: parseLLVMSegment(splitTokenList(item.tokens.slice(1))[0]),
|
||||
value: parseLLVMSegment(splitTokenList(item.tokens.slice(1))[0]),
|
||||
type: item.tokens[1].text,
|
||||
lineNum: item.lineNum
|
||||
};
|
||||
|
|
|
@ -706,6 +706,9 @@ function JSify(data, functionsOnly, givenFunctions) {
|
|||
}
|
||||
});
|
||||
}
|
||||
makeFuncLineActor('noop', function(item) {
|
||||
return ';';
|
||||
});
|
||||
makeFuncLineActor('var', function(item) { // assigns into phis become simple vars when MICRO_OPTS
|
||||
return 'var ' + item.ident + ';';
|
||||
});
|
||||
|
@ -953,7 +956,7 @@ function JSify(data, functionsOnly, givenFunctions) {
|
|||
return ret + item.ident + '.f' + item.indexes[0][0].text + ' = ' + finalizeLLVMParameter(item.value) + ', ' + item.ident + ')';
|
||||
});
|
||||
makeFuncLineActor('indirectbr', function(item) {
|
||||
return makeBranch(finalizeLLVMParameter(item.pointer), item.currLabelId, true);
|
||||
return makeBranch(finalizeLLVMParameter(item.value), item.currLabelId, true);
|
||||
});
|
||||
makeFuncLineActor('alloca', function(item) {
|
||||
if (typeof item.allocatedIndex === 'number') {
|
||||
|
|
|
@ -209,6 +209,28 @@ var Debugging = {
|
|||
}
|
||||
};
|
||||
|
||||
var PreProcessor = {
|
||||
eliminateUnneededIntrinsics: function(lines) {
|
||||
// LLVM sometimes aggresively adds lifetime annotations, for example
|
||||
//
|
||||
// %0 = bitcast %"class.std::__1::__tree"** %this.addr.i to i8* ; [#uses=1 type=i8*]
|
||||
// call void @llvm.lifetime.start(i64 -1, i8* %0) nounwind
|
||||
// [..]
|
||||
// %6 = bitcast float* %__x.addr.i to i8* ; [#uses=1 type=i8*]
|
||||
// call void @llvm.lifetime.end(i64 -1, i8* %6) nounwind
|
||||
//
|
||||
// This greatly hurts us if we do not eliminate it ahead of time, because while we
|
||||
// will correctly do nothing for the lifetime intrinsic itself, the bitcast of the
|
||||
// parameter to it will prevent nativization of the variable being cast (!)
|
||||
for (var i = 0; i < lines.length; i++) {
|
||||
var line = lines[i];
|
||||
if (/call void @llvm.lifetime.(start|end)\(i\d+ -1, i8\* %(\d+)\).*/.exec(line)) {
|
||||
lines[i] = ';';
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
var Variables = {
|
||||
globals: {}
|
||||
};
|
||||
|
|
|
@ -410,7 +410,7 @@ function parseLLVMSegment(segment) {
|
|||
Types.needAnalysis[type] = 0;
|
||||
return {
|
||||
intertype: 'structvalue',
|
||||
values: splitTokenList(segment[1].tokens).map(parseLLVMSegment),
|
||||
params: splitTokenList(segment[1].tokens).map(parseLLVMSegment),
|
||||
type: type
|
||||
};
|
||||
} else if (segment[0].text in PARSABLE_LLVM_FUNCTIONS) {
|
||||
|
@ -1458,7 +1458,7 @@ function finalizeLLVMParameter(param, noIndexizeFunctions) {
|
|||
}
|
||||
ret = parseNumerical(ret);
|
||||
} else if (param.intertype == 'structvalue') {
|
||||
ret = makeLLVMStruct(param.values.map(function(value) { return finalizeLLVMParameter(value, noIndexizeFunctions) }));
|
||||
ret = makeLLVMStruct(param.params.map(function(value) { return finalizeLLVMParameter(value, noIndexizeFunctions) }));
|
||||
} else if (param.intertype === 'blockaddress') {
|
||||
return finalizeBlockAddress(param);
|
||||
} else if (param.intertype === 'type') {
|
||||
|
@ -1793,7 +1793,7 @@ function processMathop(item) {
|
|||
|
||||
// Walks through some intertype data, calling a function at every item. If
|
||||
// the function returns true, will stop the walk.
|
||||
// TODO: Use this in analyzer, possibly also in jsifier
|
||||
// TODO: Use this more in analyzer, possibly also in jsifier
|
||||
function walkInterdata(item, pre, post, obj) {
|
||||
if (!item || !item.intertype) return false;
|
||||
if (pre && pre(item, obj)) return true;
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
; ModuleID = '/tmp/emscripten/tmp/src.cpp.o'
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
|
||||
target triple = "i386-pc-linux-gnu"
|
||||
|
||||
%struct.vec2 = type { float, float }
|
||||
|
||||
@.str = private unnamed_addr constant [15 x i8] c"hello, world!\0A\00" ; [#uses=1]
|
||||
|
||||
; [#uses=1]
|
||||
declare i32 @printf(i8* noalias, ...)
|
||||
|
||||
define linkonce_odr float @vec2Length(%struct.vec2* %this) nounwind align 2 {
|
||||
entry:
|
||||
%__first.addr.i = alloca %struct.b2Pair.5*, align 4 ; [#uses=3 type=%struct.b2Pair.5**]
|
||||
%__last.addr.i = alloca %struct.b2Pair.5*, align 4 ; [#uses=3 type=%struct.b2Pair.5**]
|
||||
%__comp.addr.i = alloca %struct.b2Pair.5*, align 4 ; [#uses=2 type=%struct.b2Pair.5**]
|
||||
%13 = bitcast %struct.vec2** %__first.addr.i to i8* ; [#uses=1 type=i8*]
|
||||
call void @llvm.lifetime.start(i64 -1, i8* %13)
|
||||
%14 = bitcast %struct.vec2** %__last.addr.i to i8* ; [#uses=1 type=i8*]
|
||||
call void @llvm.lifetime.start(i64 -1, i8* %14)
|
||||
%15 = bitcast i1 (%struct.vec2*, %struct.vec2*)** %__comp.addr.i to i8* ; [#uses=1 type=i8*]
|
||||
call void @llvm.lifetime.start(i64 -1, i8* %15)
|
||||
store %struct.vec2* %10, %struct.vec2** %__first.addr.i, align 4
|
||||
store %struct.vec2* %add.ptr, %struct.vec2** %__last.addr.i, align 4
|
||||
%18 = bitcast %struct.vec2** %__first.addr.i to i8* ; [#uses=1 type=i8*]
|
||||
call void @llvm.lifetime.end(i64 -1, i8* %18)
|
||||
%19 = bitcast %struct.vec2** %__last.addr.i to i8* ; [#uses=1 type=i8*]
|
||||
call void @llvm.lifetime.end(i64 -1, i8* %19)
|
||||
%20 = bitcast i1 (%struct.vec2*, %struct.vec2*)** %__comp.addr.i to i8* ; [#uses=1 type=i8*]
|
||||
call void @llvm.lifetime.end(i64 -1, i8* %20)
|
||||
}
|
||||
|
||||
define i32 @main() {
|
||||
entry:
|
||||
%retval = alloca i32, align 4 ; [#uses=1]
|
||||
store i32 0, i32* %retval
|
||||
%b = getelementptr inbounds i32* %retval, i32 0, i32 1 ; [#uses=1] ; force __stackBase__ to appear!
|
||||
%call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0)) ; [#uses=0]
|
||||
call i32 (i32)* @nonexistant(i32 %b) ; keep %b alive
|
||||
ret i32 0
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
if Settings.MICRO_OPTS:
|
||||
assert '__stackBase__' in generated, 'There should be some stack activity (without which, we cannot do the next checks)'
|
||||
assert '__stackBase__+4' not in generated, 'All variables should have been nativized'
|
||||
assert '__stackBase__+8' not in generated, 'All variables should have been nativized'
|
||||
assert 'comp_addr' not in generated, 'This variable should have been eliminated during analysis'
|
||||
|
|
@ -3895,7 +3895,7 @@ at function.:blag
|
|||
|
||||
for name in glob.glob(path_from_root('tests', 'cases', '*.ll')):
|
||||
shortname = name.replace('.ll', '')
|
||||
#if 'break' not in shortname: continue
|
||||
if '' not in shortname: continue
|
||||
print "Testing case '%s'..." % shortname
|
||||
output_file = path_from_root('tests', 'cases', shortname + '.txt')
|
||||
if Settings.QUANTUM_SIZE == 1:
|
||||
|
@ -3908,6 +3908,11 @@ at function.:blag
|
|||
output = 'hello, world!'
|
||||
if output.rstrip() != 'skip':
|
||||
self.do_ll_run(path_from_root('tests', 'cases', name), output)
|
||||
# Optional source checking, a python script that gets a global generated with the source
|
||||
src_checker = path_from_root('tests', 'cases', shortname + '.py')
|
||||
if os.path.exists(src_checker):
|
||||
generated = open('src.cpp.o.js').read()
|
||||
exec(open(src_checker).read())
|
||||
|
||||
# Autodebug the code
|
||||
def do_autodebug(self, filename):
|
||||
|
|
Загрузка…
Ссылка в новой задаче