Skip to content

Commit 7a41ff2

Browse files
committed
feat(decompile): abi, solidity, and yul analyzers implemented
1 parent 74e572b commit 7a41ff2

File tree

6 files changed

+365
-276
lines changed

6 files changed

+365
-276
lines changed

crates/decompile/src/core/analyze.rs

+9-2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use crate::{
2828

2929
/// The type of analyzer to use. This will determine which heuristics are used when analyzing a
3030
/// [`VMTrace`] generated by symbolic execution.
31+
#[derive(Debug, Clone, Eq, PartialEq)]
3132
pub enum AnalyzerType {
3233
/// Analyze the trace using Solidity heuristics, which will generate high-level Solidity code
3334
Solidity,
@@ -61,12 +62,14 @@ impl Display for AnalyzerType {
6162
}
6263

6364
/// State shared between heuristics
64-
#[derive(Default, Debug, Clone)]
65+
#[derive(Debug, Clone)]
6566
pub(crate) struct AnalyzerState {
6667
/// If we reach a JUMPI, this will hold the conditional for scope tracking
6768
pub jumped_conditional: Option<String>,
6869
/// Tracks a stack of conditionals, used for scope tracking
6970
pub conditional_stack: Vec<String>,
71+
/// Tracks which analyzer type we are using
72+
pub analyzer_type: AnalyzerType,
7073
}
7174

7275
/// The analyzer, which will analyze a [`VMTrace`] generated by symbolic execution and build an
@@ -142,7 +145,11 @@ impl Analyzer {
142145
/// Inner analysis implementation
143146
fn analyze_inner(&mut self, branch: &VMTrace) -> Result<(), Error> {
144147
// get the analyzer state
145-
let mut analyzer_state = AnalyzerState::default();
148+
let mut analyzer_state = AnalyzerState {
149+
jumped_conditional: None,
150+
conditional_stack: Vec::new(),
151+
analyzer_type: self.typ.clone(),
152+
};
146153

147154
// for each operation in the current trace branch, peform analysis with registerred
148155
// heuristics

crates/decompile/src/core/mod.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ pub async fn decompile(args: DecompilerArgs) -> Result<DecompileResult, Error> {
177177

178178
// analyze the symbolic execution trace
179179
let analyzed_function = analyzer.analyze()?;
180-
println!("{:#?}", analyzed_function.arguments);
180+
println!("{:#?}", analyzed_function.logic);
181181

182182
Ok::<_, Error>(analyzed_function)
183183
})
@@ -186,6 +186,7 @@ pub async fn decompile(args: DecompilerArgs) -> Result<DecompileResult, Error> {
186186
debug!("analyzing symbolic execution results took {:?}", start_analysis_time.elapsed());
187187
info!("analyzed {} symbolic execution traces", analyzed_functions.len());
188188

189+
// resolve event and error selectors
189190
if !args.skip_resolving {
190191
// resolve error selectors
191192
let start_error_resolving_time = Instant::now();
@@ -252,6 +253,7 @@ pub async fn decompile(args: DecompilerArgs) -> Result<DecompileResult, Error> {
252253
all_resolved_events.extend(resolved_events);
253254
}
254255

256+
// match analyzed parameters with resolved signatures for each function
255257
analyzed_functions.iter_mut().for_each(|f| {
256258
let resolve_function_signatures =
257259
resolved_selectors.get(&f.selector).unwrap_or(&Vec::new()).to_owned();

crates/decompile/src/core/out/abi.rs

-3
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,6 @@ pub fn build_abi(
136136
abi.functions.insert(name, vec![function]);
137137
});
138138

139-
// pretty print abi
140-
println!("{}", serde_json::to_string_pretty(&abi).expect("failed to serialize abi"));
141-
142139
debug!("constructing abi took {:?}", start_time.elapsed());
143140

144141
Ok(abi)

crates/decompile/src/utils/heuristics/arguments.rs

+85-3
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,23 @@ use std::collections::HashSet;
22

33
use ethers::types::U256;
44

5-
use heimdall_common::ether::evm::core::{types::convert_bitmask, vm::State};
5+
use heimdall_common::ether::evm::core::{
6+
types::{byte_size_to_type, convert_bitmask},
7+
vm::State,
8+
};
69
use tracing::{debug, trace};
710

811
use crate::{
9-
core::analyze::AnalyzerState,
12+
core::analyze::{AnalyzerState, AnalyzerType},
1013
interfaces::{AnalyzedFunction, CalldataFrame, TypeHeuristic},
14+
utils::constants::{AND_BITMASK_REGEX, AND_BITMASK_REGEX_2},
1115
Error,
1216
};
1317

1418
pub fn argument_heuristic(
1519
function: &mut AnalyzedFunction,
1620
state: &State,
17-
_: &mut AnalyzerState,
21+
analyzer_state: &mut AnalyzerState,
1822
) -> Result<(), Error> {
1923
match state.last_instruction.opcode {
2024
// CALLDATALOAD
@@ -76,6 +80,84 @@ pub fn argument_heuristic(
7680
}
7781
}
7882

83+
// RETURN
84+
0xf3 => {
85+
// Safely convert U256 to usize
86+
let size: usize = state.last_instruction.inputs[1].try_into().unwrap_or(0);
87+
88+
let return_memory_operations = function.get_memory_range(
89+
state.last_instruction.inputs[0],
90+
state.last_instruction.inputs[1],
91+
);
92+
let return_memory_operations_solidified = return_memory_operations
93+
.iter()
94+
.map(|x| x.operations.solidify())
95+
.collect::<Vec<String>>()
96+
.join(", ");
97+
98+
// add the return statement to the function logic
99+
if analyzer_state.analyzer_type == AnalyzerType::Solidity {
100+
if return_memory_operations.len() <= 1 {
101+
function.logic.push(format!("return {return_memory_operations_solidified};"));
102+
} else {
103+
function.logic.push(format!(
104+
"return abi.encodePacked({return_memory_operations_solidified});"
105+
));
106+
}
107+
} else if analyzer_state.analyzer_type == AnalyzerType::Yul {
108+
function.logic.push(format!(
109+
"return({}, {})",
110+
state.last_instruction.input_operations[0].yulify(),
111+
state.last_instruction.input_operations[1].yulify()
112+
));
113+
}
114+
115+
// if we've already determined a return type, we don't want to do it again.
116+
// we use bytes32 as a default return type
117+
if function.returns != Some(String::from("bytes32")) {
118+
return Ok(());
119+
}
120+
121+
// if the any input op is ISZERO(x), this is a boolean return
122+
if return_memory_operations.iter().any(|x| x.operations.opcode.name == "ISZERO") {
123+
function.returns = Some(String::from("bool"));
124+
}
125+
// if the size of returndata is > 32, it must be a bytes memory return.
126+
// it could be a struct, but we cant really determine that from the bytecode
127+
else if size > 32 {
128+
function.returns = Some(String::from("bytes memory"));
129+
} else {
130+
// attempt to find a return type within the return memory operations
131+
let byte_size = match AND_BITMASK_REGEX
132+
.find(&return_memory_operations_solidified)
133+
.ok()
134+
.flatten()
135+
{
136+
Some(bitmask) => {
137+
let cast = bitmask.as_str();
138+
139+
cast.matches("ff").count()
140+
}
141+
None => match AND_BITMASK_REGEX_2
142+
.find(&return_memory_operations_solidified)
143+
.ok()
144+
.flatten()
145+
{
146+
Some(bitmask) => {
147+
let cast = bitmask.as_str();
148+
149+
cast.matches("ff").count()
150+
}
151+
None => 32,
152+
},
153+
};
154+
155+
// convert the cast size to a string
156+
let (_, cast_types) = byte_size_to_type(byte_size);
157+
function.returns = Some(cast_types[0].to_string());
158+
}
159+
}
160+
79161
// integer type heuristics
80162
0x02 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0b | 0x10 | 0x11 | 0x12 | 0x13 => {
81163
// check if this instruction is operating on a known argument.

0 commit comments

Comments
 (0)