diff --git a/WasmLib/Decompilation/GenericDecompiler.cs b/WasmLib/Decompilation/GenericDecompiler.cs index 0785855..39b1a1e 100644 --- a/WasmLib/Decompilation/GenericDecompiler.cs +++ b/WasmLib/Decompilation/GenericDecompiler.cs @@ -5,9 +5,9 @@ using System.IO; using System.Linq; using Rivers; using Rivers.Analysis; -using Rivers.Serialization.Dot; using WasmLib.Decompilation.Intermediate; using WasmLib.Decompilation.Intermediate.Graph; +using WasmLib.Decompilation.SourceCode; using WasmLib.FileFormat; namespace WasmLib.Decompilation @@ -27,12 +27,22 @@ namespace WasmLib.Decompilation FunctionSignature signature = WasmModule.FunctionTypes[WasmModule.Functions[functionIndex]]; List instructions = new IntermediateConverter(WasmModule, body, signature).Convert(); - // TODO: add return instruction to intermediate - // TODO: add graph node for global state, locals or parameters? + Graph graph = CreateGraph(instructions); + + // TODO: remove subtrees with no side effects? + + OutputAsCode(graph, output); + + // var writer = new DotWriter(output); + // writer.Write(graph); + } + + private static Graph CreateGraph(IEnumerable instructions) + { var graph = new Graph(); var stack = new Stack<(InstructionNode, ValueKind)>(); - + int instructionNum = 0; foreach (IntermediateInstruction instruction in instructions) { if (instruction.HasBlock) { @@ -42,32 +52,81 @@ namespace WasmLib.Decompilation InstructionNode node = new InstructionNode(instruction, instructionNum++); graph.Nodes.Add(node); + Debug.Assert(instruction.PushCount <= 1, "Instruction pushed multiple variables to stack, which shouldn't happen."); for (int i = 0; i < instruction.PopCount; i++) { (InstructionNode sourceInstruction, ValueKind type) = stack.Pop(); Debug.Assert(type == instruction.PopTypes[i]); sourceInstruction.OutgoingEdges.Add(new StackVariableEdge(sourceInstruction, node, type)); } + for (int i = 0; i < instruction.PushCount; i++) { stack.Push((node, instruction.PushTypes[i])); } - + // if this instruction is not pure, add a dependency on the last impure instruction, if any // NOTE: this could possibly be optimized by having different kinds of impurity - if (!instruction.IsPure) { - InstructionNode? dependentInstruction = graph.Nodes.Cast().Reverse().Skip(1).FirstOrDefault(x => !x.Instruction.IsPure); + // TODO: edges not required anymore for decompiled code output? + if (!node.IsPure) { + InstructionNode? dependentInstruction = graph.Nodes.Cast().Reverse().Skip(1).FirstOrDefault(x => !x.IsPure); dependentInstruction?.OutgoingEdges.Add(new ImpurityDependencyEdge(dependentInstruction, node)); } } - - // this assert seems to fail, perhaps write own version in the future - // Debug.Assert(!graph.IsCyclic(), "Got cyclic dependency in function!"); - Console.WriteLine(graph.IsCyclic()); - - // TODO: get all nodes with no outgoing edges, write them out (taking into account side effects?) - // TODO: remove trees with no side effects - var writer = new DotWriter(output, new DefaultUserDataSerializer()); - writer.Write(graph); + // BUG: see Washi1337/Rivers#6 + // Debug.Assert(!graph.IsCyclic(), "Got cyclic dependency in function!"); + if (!graph.IsConnected()) { + throw new NotImplementedException(); + } + + return graph; + } + + private static void OutputAsCode(Graph graph, TextWriter output) + { + var varCounts = new Dictionary { + {ValueKind.I32, 0}, + {ValueKind.I64, 0}, + {ValueKind.F32, 0}, + {ValueKind.F64, 0}, + }; + + var statements = new Dictionary(); + foreach (var currentNode in graph.Nodes.OfType()) { + var parameterEdges = currentNode.IncomingVariableEdges.ToArray(); + + // only handle if node consumes variables + if (parameterEdges.Any()) { + // for each dependency, check if it can be reached in a pure way + // if so, inline it + // if not, create intermediary statements + var parameters = new Expression[parameterEdges.Length]; + for (int i = 0; i < parameterEdges.Length; i++) { + var edge = parameterEdges[i]; + var variableNode = edge.Source; + var isPure = !graph.Nodes.OfType().Any(x => !x.IsPure & x.Index > variableNode.Index && x.Index < currentNode.Index); + + if (isPure) { // TODO: and instruction can be inlined + parameters[i] = statements[variableNode.Index]; + statements.Remove(variableNode.Index); + } + else { + var assignment = new AssignmentExpression(statements[variableNode.Index], edge.Type, varCounts[edge.Type]++); + statements[variableNode.Index] = assignment; + parameters[i] = assignment.Reference; + } + } + + statements[currentNode.Index] = new GenericExpression(currentNode.Instruction, parameters); + } + else { + statements[currentNode.Index] = new GenericExpression(currentNode.Instruction); + } + + } + + foreach (Expression expression in statements.Values) { + output.WriteLine(expression.GetStringRepresentation()); + } } } } \ No newline at end of file diff --git a/WasmLib/Decompilation/Intermediate/Graph/InstructionNode.cs b/WasmLib/Decompilation/Intermediate/Graph/InstructionNode.cs index 18e73bb..5bca9d3 100644 --- a/WasmLib/Decompilation/Intermediate/Graph/InstructionNode.cs +++ b/WasmLib/Decompilation/Intermediate/Graph/InstructionNode.cs @@ -1,4 +1,6 @@ +using System.Collections.Generic; using System.Diagnostics; +using System.Linq; using Rivers; namespace WasmLib.Decompilation.Intermediate.Graph @@ -6,10 +8,18 @@ namespace WasmLib.Decompilation.Intermediate.Graph public class InstructionNode : Node { public IntermediateInstruction Instruction { get; } + public int Index { get; } + public bool IsPure => Instruction.IsPure; + + public IEnumerable OutgoingImpurityEdges => OutgoingEdges.OfType(); + public IEnumerable IncomingImpurityEdges => IncomingEdges.OfType(); + public IEnumerable OutgoingVariableEdges => OutgoingEdges.OfType(); + public IEnumerable IncomingVariableEdges => IncomingEdges.OfType(); public InstructionNode(IntermediateInstruction instruction, int idx) : base($"_{idx:X4}") { Instruction = instruction; + Index = idx; AddUserData(); } @@ -23,5 +33,7 @@ namespace WasmLib.Decompilation.Intermediate.Graph ? $" {instructionString} " : instructionString; } + + public override string ToString() => Instruction.ToString(); } } \ No newline at end of file diff --git a/WasmLib/Decompilation/SourceCode/AssignmentExpression.cs b/WasmLib/Decompilation/SourceCode/AssignmentExpression.cs new file mode 100644 index 0000000..97311a9 --- /dev/null +++ b/WasmLib/Decompilation/SourceCode/AssignmentExpression.cs @@ -0,0 +1,21 @@ +using WasmLib.FileFormat; +using WasmLib.Utils; + +namespace WasmLib.Decompilation.SourceCode +{ + public class AssignmentExpression : Expression + { + public Expression BaseExpression { get; } + public VariableReferenceExpression Reference { get; } + public string Name { get; } + + public AssignmentExpression(Expression baseExpression, ValueKind type, int index) + { + BaseExpression = baseExpression; + Name = $"{EnumUtils.GetDescription(type)}_{index}"; + Reference = new VariableReferenceExpression(Name); + } + + public override string GetStringRepresentation() => $"{Reference.GetStringRepresentation()} = {BaseExpression}"; + } +} \ No newline at end of file diff --git a/WasmLib/Decompilation/SourceCode/Expression.cs b/WasmLib/Decompilation/SourceCode/Expression.cs new file mode 100644 index 0000000..052afb6 --- /dev/null +++ b/WasmLib/Decompilation/SourceCode/Expression.cs @@ -0,0 +1,9 @@ +namespace WasmLib.Decompilation.SourceCode +{ + public abstract class Expression + { + public abstract string GetStringRepresentation(); + + public override string ToString() => GetStringRepresentation(); + } +} \ No newline at end of file diff --git a/WasmLib/Decompilation/SourceCode/GenericExpression.cs b/WasmLib/Decompilation/SourceCode/GenericExpression.cs new file mode 100644 index 0000000..09d4b56 --- /dev/null +++ b/WasmLib/Decompilation/SourceCode/GenericExpression.cs @@ -0,0 +1,21 @@ +using System.Linq; +using WasmLib.Decompilation.Intermediate; + +namespace WasmLib.Decompilation.SourceCode +{ + public class GenericExpression : Expression + { + public IntermediateInstruction BaseInstruction { get; } + public Expression[]? Parameters { get; } + + public GenericExpression(IntermediateInstruction baseInstruction, Expression[]? parameters = null) + { + BaseInstruction = baseInstruction; + Parameters = parameters; + } + + public override string GetStringRepresentation() => Parameters is null + ? BaseInstruction.ToString() + : $"{BaseInstruction}({string.Join(", ", Parameters.Reverse().Select(x => x.GetStringRepresentation()))})"; + } +} \ No newline at end of file diff --git a/WasmLib/Decompilation/SourceCode/VariableReferenceExpression.cs b/WasmLib/Decompilation/SourceCode/VariableReferenceExpression.cs new file mode 100644 index 0000000..37200f8 --- /dev/null +++ b/WasmLib/Decompilation/SourceCode/VariableReferenceExpression.cs @@ -0,0 +1,14 @@ +namespace WasmLib.Decompilation.SourceCode +{ + public class VariableReferenceExpression : Expression + { + public string Name { get; } + + public VariableReferenceExpression(string name) + { + Name = name; + } + + public override string GetStringRepresentation() => Name; + } +} \ No newline at end of file