Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions decompiler/pipeline/ssa/conditional_out_of_SSA.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import DefaultDict, List

from decompiler.pipeline.ssa.metric_helper import MetricHelper
from decompiler.pipeline.ssa.phi_dependency_resolver import PhiDependencyResolver
from decompiler.pipeline.ssa.phi_lifting import PhiFunctionLifter
from decompiler.pipeline.ssa.variable_renaming import ConditionalVariableRenamer
from decompiler.structures.graphs.cfg import BasicBlock
from decompiler.structures.interferencegraph import InterferenceGraph
from decompiler.structures.pseudo.instructions import Phi
from decompiler.task import DecompilerTask


class ConditionalOutOfSSA:

def __init__(
self,
task: DecompilerTask,
_phi_fuctions_of: DefaultDict[BasicBlock, List[Phi]],
strong: float = 0.94606,
mid: float = 0.332811,
weak: float = 0.410742,
strategy: int = 3,
):
"""
strong/ weak/ mid: Values for the corresponding edges
"""
self.task = task
self.cfg = task.cfg
self.strongDep = strong
self.midDep = mid
self.weakDep = weak
self._phi_functions_of = _phi_fuctions_of
self.strategy = strategy

def perform(self):
self._metric_helper = MetricHelper(self.task.cfg)
PhiDependencyResolver(self._phi_functions_of).resolve()
self.interference_graph = InterferenceGraph(self.task.cfg)
PhiFunctionLifter(self.task.graph, self.interference_graph, self._phi_functions_of).lift()
ConditionalVariableRenamer(
self.task, self.interference_graph, self._metric_helper, self.strongDep, self.midDep, self.weakDep, self.strategy
).rename()
121 changes: 76 additions & 45 deletions decompiler/pipeline/ssa/dependency_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@
from typing import Iterator

import networkx
import networkx as nx
from decompiler.pipeline.ssa.metric_helper import MetricHelper
from decompiler.structures.graphs.cfg import ControlFlowGraph
from decompiler.structures.interferencegraph import InterferenceGraph
from decompiler.structures.pseudo import Expression, Operation, OperationType
from decompiler.structures.pseudo.expressions import Variable
from decompiler.structures.pseudo import Call, Expression, ListOperation, Operation, OperationType, TernaryExpression, UnaryOperation
from decompiler.structures.pseudo.expressions import Constant, GlobalVariable, NotUseableConstant, Symbol, Variable
from decompiler.structures.pseudo.instructions import Assignment
from decompiler.util.decoration import DecoratedGraph
from networkx import MultiDiGraph

# Multiplicative constant applied to dependency scores when encountering operations, to penalize too much nesting.
OPERATION_PENALTY = 0.9
from networkx import MultiDiGraph, MultiGraph, to_undirected


def decorate_dependency_graph(dependency_graph: MultiDiGraph, interference_graph: InterferenceGraph) -> DecoratedGraph:
Expand All @@ -37,33 +36,42 @@ def decorate_dependency_graph(dependency_graph: MultiDiGraph, interference_graph
return DecoratedGraph(decorated_graph)


def dependency_graph_from_cfg(cfg: ControlFlowGraph) -> MultiDiGraph:
def dependency_graph_from_cfg(
cfg: ControlFlowGraph, strong: float, mid: float, weak: float, ifg: InterferenceGraph, metric_helper: MetricHelper
) -> MultiGraph:
"""
Construct the dependency graph of the given CFG, i.e. adds an edge between two variables if they depend on each other.
- Add an edge the definition to at most one requirement for each instruction.
- All variables that where not defined via Phi-functions before have out-degree of at most 1, because they are defined at most once.
- Variables that are defined via Phi-functions can have one successor for each required variable of the Phi-function.
"""
dependency_graph = MultiDiGraph()

dependency_graph = MultiGraph()
for variable in _collect_variables(cfg):
dependency_graph.add_node((variable,))
for instruction in _assignments_in_cfg(cfg):
defined_variables = instruction.definitions
for used_variable, score in _expression_dependencies(instruction.value).items():
if score > 0:
dependency_graph.add_edges_from((((dvar,), (used_variable,)) for dvar in defined_variables), score=score)

for used_variable, score in _expression_dependencies(instruction.value, strong, mid, weak).items():
if (score > 0) and not (ifg.are_interfering(*defined_variables, used_variable)):
for dvar in defined_variables:
#if (score != weak) or (not foo(dvar, used_variable, metric_helper)):
dependency_graph.add_edge((dvar,), (used_variable,), a=score)
#else:
# dependency_graph.add_edge((dvar,), (used_variable,), a=strong)
# dependency_graph.add_edges_from((((dvar,), (used_variable,),"a",score) if else ((dvar,), (used_variable,),"a",mid) for dvar in defined_variables ))
return dependency_graph


#def foo(a, b, graph: MetricHelper):
# return graph.vars_are_connected_strongly(a, b)


def _collect_variables(cfg: ControlFlowGraph) -> Iterator[Variable]:
"""
Yields all variables contained in the given control flow graph.
"""
for instruction in cfg.instructions:
for subexpression in instruction.subexpressions():
if isinstance(subexpression, Variable):
if (isinstance(subexpression, Variable)) and (not isinstance(subexpression, UnaryOperation)):
yield subexpression


Expand All @@ -74,41 +82,64 @@ def _assignments_in_cfg(cfg: ControlFlowGraph) -> Iterator[Assignment]:
yield instr


def _expression_dependencies(expression: Expression) -> dict[Variable, float]:
def _get_base_operands(expression: list[Expression]) -> list:
islow = False
parts = list()
remains = list()
remains.extend(expression)

while len(remains) != 0:
exp = remains.pop()

if isinstance(exp, GlobalVariable):
parts.append(exp)
elif isinstance(exp, Variable):
parts.append(exp)
elif (isinstance(exp, Constant)) and (not isinstance(exp, (Symbol, NotUseableConstant, GlobalVariable))):
parts.append(exp)
elif isinstance(exp, Operation) and (
(not isinstance(exp, (ListOperation, UnaryOperation, Call, TernaryExpression)))
or (isinstance(exp, UnaryOperation) and ((exp.operation == OperationType.cast)))
):
remains += exp.operands
elif isinstance(exp, Operation) and (
(not isinstance(exp, (ListOperation, Call, TernaryExpression)))
or (
isinstance(exp, UnaryOperation)
and (
(exp.operation == OperationType.dereference)
or (exp.operation == OperationType.address)
or (exp.operation == OperationType.pointer)
)
)
):
remains += exp.operands
islow = True
elif isinstance(exp, Call):
remains += exp.parameters
islow = True
return list(set(parts)), islow


def _expression_dependencies(expression: Expression, strong: float, mid: float, weak: float) -> dict[Variable, float]:
"""
Calculate the dependencies of an expression in terms of its constituent variables.

This function analyzes the given `expression` and returns a dictionary mapping each
`Variable` to a float score representing its contribution or dependency weight within
the expression.
The scoring mechanism accounts for different types of operations and
penalizes nested operations to reflect their complexity.
"""
match expression:
case Variable():
return {expression: 1.0}
case Operation():
if expression.operation in {
OperationType.call,
OperationType.address,
OperationType.dereference,
OperationType.member_access,
}:
return {}

operands_dependencies = list(filter(lambda d: d, (_expression_dependencies(operand) for operand in expression.operands)))
dependencies: dict[Variable, float] = {}
for deps in operands_dependencies:
for var in deps:
score = deps[var]
score /= len(operands_dependencies)
score *= OPERATION_PENALTY # penalize operations, so that expressions like (a + (a + (a + (a + a)))) gets a lower score than just (a)

if var not in dependencies:
dependencies[var] = score
else:
dependencies[var] += score

return dependencies
case _:
return {}
operands_dependencies, low = _get_base_operands([expression])
if (len(operands_dependencies) == 1) and (isinstance(operands_dependencies[0], Variable)):
if not low:
return {operands_dependencies[0]: strong}
else:
return {operands_dependencies[0]: weak}
elif len(operands_dependencies) > 1:
vars = [var for var in operands_dependencies if isinstance(var, Variable)]
if (len(vars) == 1) and (not low):
return {vars[0]: mid}
else:
return {x: weak for x in vars}
else:
return {}
Loading
Loading