Output pseudo-code in generic decompiler using generic IR names

This commit is contained in:
HoLLy 2019-11-22 21:56:11 +01:00
Родитель 92b5f05677
Коммит 7b97f0d37a
6 изменённых файлов: 152 добавлений и 16 удалений

Просмотреть файл

@ -5,9 +5,9 @@ using System.IO;
using System.Linq;
using Rivers;
using Rivers.Analysis;
using Rivers.Serialization.Dot;
using WasmLib.Decompilation.Intermediate;
using WasmLib.Decompilation.Intermediate.Graph;
using WasmLib.Decompilation.SourceCode;
using WasmLib.FileFormat;
namespace WasmLib.Decompilation
@ -27,12 +27,22 @@ namespace WasmLib.Decompilation
FunctionSignature signature = WasmModule.FunctionTypes[WasmModule.Functions[functionIndex]];
List<IntermediateInstruction> instructions = new IntermediateConverter(WasmModule, body, signature).Convert();
// TODO: add return instruction to intermediate
// TODO: add graph node for global state, locals or parameters?
Graph graph = CreateGraph(instructions);
// TODO: remove subtrees with no side effects?
OutputAsCode(graph, output);
// var writer = new DotWriter(output);
// writer.Write(graph);
}
private static Graph CreateGraph(IEnumerable<IntermediateInstruction> instructions)
{
var graph = new Graph();
var stack = new Stack<(InstructionNode, ValueKind)>();
int instructionNum = 0;
foreach (IntermediateInstruction instruction in instructions) {
if (instruction.HasBlock) {
@ -42,32 +52,81 @@ namespace WasmLib.Decompilation
InstructionNode node = new InstructionNode(instruction, instructionNum++);
graph.Nodes.Add(node);
Debug.Assert(instruction.PushCount <= 1, "Instruction pushed multiple variables to stack, which shouldn't happen.");
for (int i = 0; i < instruction.PopCount; i++) {
(InstructionNode sourceInstruction, ValueKind type) = stack.Pop();
Debug.Assert(type == instruction.PopTypes[i]);
sourceInstruction.OutgoingEdges.Add(new StackVariableEdge(sourceInstruction, node, type));
}
for (int i = 0; i < instruction.PushCount; i++) {
stack.Push((node, instruction.PushTypes[i]));
}
// if this instruction is not pure, add a dependency on the last impure instruction, if any
// NOTE: this could possibly be optimized by having different kinds of impurity
if (!instruction.IsPure) {
InstructionNode? dependentInstruction = graph.Nodes.Cast<InstructionNode>().Reverse().Skip(1).FirstOrDefault(x => !x.Instruction.IsPure);
// TODO: edges not required anymore for decompiled code output?
if (!node.IsPure) {
InstructionNode? dependentInstruction = graph.Nodes.Cast<InstructionNode>().Reverse().Skip(1).FirstOrDefault(x => !x.IsPure);
dependentInstruction?.OutgoingEdges.Add(new ImpurityDependencyEdge(dependentInstruction, node));
}
}
// this assert seems to fail, perhaps write own version in the future
// Debug.Assert(!graph.IsCyclic(), "Got cyclic dependency in function!");
Console.WriteLine(graph.IsCyclic());
// TODO: get all nodes with no outgoing edges, write them out (taking into account side effects?)
// TODO: remove trees with no side effects
var writer = new DotWriter(output, new DefaultUserDataSerializer());
writer.Write(graph);
// BUG: see Washi1337/Rivers#6
// Debug.Assert(!graph.IsCyclic(), "Got cyclic dependency in function!");
if (!graph.IsConnected()) {
throw new NotImplementedException();
}
return graph;
}
private static void OutputAsCode(Graph graph, TextWriter output)
{
var varCounts = new Dictionary<ValueKind, int> {
{ValueKind.I32, 0},
{ValueKind.I64, 0},
{ValueKind.F32, 0},
{ValueKind.F64, 0},
};
var statements = new Dictionary<int, Expression>();
foreach (var currentNode in graph.Nodes.OfType<InstructionNode>()) {
var parameterEdges = currentNode.IncomingVariableEdges.ToArray();
// only handle if node consumes variables
if (parameterEdges.Any()) {
// for each dependency, check if it can be reached in a pure way
// if so, inline it
// if not, create intermediary statements
var parameters = new Expression[parameterEdges.Length];
for (int i = 0; i < parameterEdges.Length; i++) {
var edge = parameterEdges[i];
var variableNode = edge.Source;
var isPure = !graph.Nodes.OfType<InstructionNode>().Any(x => !x.IsPure & x.Index > variableNode.Index && x.Index < currentNode.Index);
if (isPure) { // TODO: and instruction can be inlined
parameters[i] = statements[variableNode.Index];
statements.Remove(variableNode.Index);
}
else {
var assignment = new AssignmentExpression(statements[variableNode.Index], edge.Type, varCounts[edge.Type]++);
statements[variableNode.Index] = assignment;
parameters[i] = assignment.Reference;
}
}
statements[currentNode.Index] = new GenericExpression(currentNode.Instruction, parameters);
}
else {
statements[currentNode.Index] = new GenericExpression(currentNode.Instruction);
}
}
foreach (Expression expression in statements.Values) {
output.WriteLine(expression.GetStringRepresentation());
}
}
}
}

Просмотреть файл

@ -1,4 +1,6 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using Rivers;
namespace WasmLib.Decompilation.Intermediate.Graph
@ -6,10 +8,18 @@ namespace WasmLib.Decompilation.Intermediate.Graph
public class InstructionNode : Node
{
public IntermediateInstruction Instruction { get; }
public int Index { get; }
public bool IsPure => Instruction.IsPure;
public IEnumerable<ImpurityDependencyEdge> OutgoingImpurityEdges => OutgoingEdges.OfType<ImpurityDependencyEdge>();
public IEnumerable<ImpurityDependencyEdge> IncomingImpurityEdges => IncomingEdges.OfType<ImpurityDependencyEdge>();
public IEnumerable<StackVariableEdge> OutgoingVariableEdges => OutgoingEdges.OfType<StackVariableEdge>();
public IEnumerable<StackVariableEdge> IncomingVariableEdges => IncomingEdges.OfType<StackVariableEdge>();
public InstructionNode(IntermediateInstruction instruction, int idx) : base($"_{idx:X4}")
{
Instruction = instruction;
Index = idx;
AddUserData();
}
@ -23,5 +33,7 @@ namespace WasmLib.Decompilation.Intermediate.Graph
? $" {instructionString} "
: instructionString;
}
public override string ToString() => Instruction.ToString();
}
}

Просмотреть файл

@ -0,0 +1,21 @@
using WasmLib.FileFormat;
using WasmLib.Utils;
namespace WasmLib.Decompilation.SourceCode
{
public class AssignmentExpression : Expression
{
public Expression BaseExpression { get; }
public VariableReferenceExpression Reference { get; }
public string Name { get; }
public AssignmentExpression(Expression baseExpression, ValueKind type, int index)
{
BaseExpression = baseExpression;
Name = $"{EnumUtils.GetDescription(type)}_{index}";
Reference = new VariableReferenceExpression(Name);
}
public override string GetStringRepresentation() => $"{Reference.GetStringRepresentation()} = {BaseExpression}";
}
}

Просмотреть файл

@ -0,0 +1,9 @@
namespace WasmLib.Decompilation.SourceCode
{
public abstract class Expression
{
public abstract string GetStringRepresentation();
public override string ToString() => GetStringRepresentation();
}
}

Просмотреть файл

@ -0,0 +1,21 @@
using System.Linq;
using WasmLib.Decompilation.Intermediate;
namespace WasmLib.Decompilation.SourceCode
{
public class GenericExpression : Expression
{
public IntermediateInstruction BaseInstruction { get; }
public Expression[]? Parameters { get; }
public GenericExpression(IntermediateInstruction baseInstruction, Expression[]? parameters = null)
{
BaseInstruction = baseInstruction;
Parameters = parameters;
}
public override string GetStringRepresentation() => Parameters is null
? BaseInstruction.ToString()
: $"{BaseInstruction}({string.Join(", ", Parameters.Reverse().Select(x => x.GetStringRepresentation()))})";
}
}

Просмотреть файл

@ -0,0 +1,14 @@
namespace WasmLib.Decompilation.SourceCode
{
public class VariableReferenceExpression : Expression
{
public string Name { get; }
public VariableReferenceExpression(string name)
{
Name = name;
}
public override string GetStringRepresentation() => Name;
}
}