Skip to content

Commit d3feefa

Browse files
authored
chore: Trait Cleanup and PcodeOpRef (#133)
Cleaned up some unused CPA trait items left-over from the Residue refactor Added some doc comments Compound Analyses are still () tuples of analyses, but their inner states are now a CompoundState tuple struct instead of a raw Tuple. The raw tuple is nice but prevents things like implementing Display automatially for compound states, so trying out using our own type. The named tuple is a little grosser, but users mostly don't interact directly with the states anyway? Perhaps there's a nice way to use that type internally but still give the user a () tuple to work with. We inject BRANCH instructions representing follow-through. Changed the SleighContext to inject these branches on construction of Instruction instead of dynamically on access to an invalid index. This allows returning a reference to these injected operations instead of a value. Re-implemented the unwinding analysis as a generic wrapper around other analyses Added a PcodeOpRef type that wraps a Cow<PcodeOperation>; made all CPA/CFG traits use this ref type instead. This allows many types to pass ops by reference which should involve much less cloning/allocation in many workloads.
1 parent 3379254 commit d3feefa

File tree

26 files changed

+1171
-1060
lines changed

26 files changed

+1171
-1060
lines changed

jingle/examples/stack_offset.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ fn main() {
100100
VarnodeValue::Entry(_) => " [stack: Entry (0)]".to_string(),
101101
VarnodeValue::Offset(_, off) => format!(" [stack: {:+}]", off),
102102
VarnodeValue::Const(c) => format!(" [stack: const 0x{:x}]", c),
103-
VarnodeValue::Bottom => " [stack: bottom]".to_string(),
104103
_ => " [stack: unknown]".to_string(),
105104
})
106105
.unwrap_or_default();
@@ -161,7 +160,6 @@ fn main() {
161160
VarnodeValue::Entry(_) => " [stack: Entry (0)]".to_string(),
162161
VarnodeValue::Offset(_, off) => format!(" [stack: {:+}]", off),
163162
VarnodeValue::Const(c) => format!(" [stack: const 0x{:x}]", c),
164-
VarnodeValue::Bottom => " [stack: bottom]".to_string(),
165163
_ => " [stack: unknown]".to_string(),
166164
})
167165
.unwrap_or_default();

jingle/examples/unwind.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#![allow(unused)]
2+
3+
use jingle::analysis::cpa::RunnableConfigurableProgramAnalysis;
4+
use jingle::analysis::cpa::reducer::CfgReducer;
5+
use jingle::analysis::cpa::residue::Residue;
6+
use jingle::analysis::cpa::state::LocationState;
7+
use jingle::analysis::direct_location::{CallBehavior, DirectLocationAnalysis};
8+
use jingle::analysis::unwinding::BoundedBackEdgeVisitAnalysis;
9+
use jingle::analysis::unwinding2::UnwindExt;
10+
use jingle::analysis::{Analysis, RunnableAnalysis};
11+
use jingle::modeling::machine::cpu::concrete::ConcretePcodeAddress;
12+
use jingle_sleigh::context::image::gimli::load_with_gimli;
13+
use petgraph::dot::Dot;
14+
use std::{env, fs};
15+
16+
/// Addresses of various test functions in the example binary.
17+
const FUNC_LINE: u64 = 0x100000460;
18+
const FUNC_BRANCH: u64 = 0x100000480;
19+
const FUNC_SWITCH: u64 = 0x1000004a0;
20+
const FUNC_LOOP: u64 = 0x100000548;
21+
const FUNC_NESTED: u64 = 0x100000588;
22+
const FUNC_GOTO: u64 = 0x100000610;
23+
24+
fn main() {
25+
// Initialize tracing for debug output
26+
tracing_subscriber::fmt()
27+
.with_max_level(tracing::Level::INFO)
28+
.with_target(false)
29+
.with_thread_ids(false)
30+
.with_line_number(true)
31+
.init();
32+
33+
tracing::info!("Starting unwinding analysis with back-edge visit counting");
34+
35+
// Load binary via gimli-backed image context
36+
let bin_path = env::home_dir()
37+
.unwrap()
38+
.join("Documents/test_funcs/build/example");
39+
let loaded = load_with_gimli(bin_path, "/Applications/ghidra").unwrap();
40+
41+
tracing::info!("Binary loaded successfully");
42+
43+
// Run unwinding analysis - back-edges are computed internally
44+
tracing::info!("Running unwinding analysis with bounded back-edge visit counting");
45+
46+
let location_analysis = DirectLocationAnalysis::new(CallBehavior::Branch).unwind(5);
47+
48+
// Wrap with CfgReducer
49+
let mut analysis_with_cfg = location_analysis.with_residue(CfgReducer::new());
50+
51+
// Run the unwinding analysis
52+
let cfg = analysis_with_cfg.run(&loaded, ConcretePcodeAddress::from(FUNC_NESTED));
53+
54+
// Print results
55+
println!("\nUnwinding Analysis Results:");
56+
println!("===========================\n");
57+
58+
println!("CFG nodes (unwound states): {}", cfg.nodes().count());
59+
60+
let mut locations: Vec<_> = cfg.nodes().filter_map(|n| n.get_location()).collect();
61+
locations.sort();
62+
locations.dedup();
63+
64+
println!("Unique program locations: {}", locations.len());
65+
for loc in &locations {
66+
let count = cfg
67+
.nodes()
68+
.filter(|n| n.get_location() == Some(*loc))
69+
.count();
70+
println!(" 0x{:x} (visited {} times)", loc, count);
71+
}
72+
fs::write("dot.dot", format!("{:x}", Dot::new(cfg.graph())));
73+
println!(
74+
"\nTotal CFG nodes with unwinding: {}",
75+
cfg.graph().node_count()
76+
);
77+
78+
tracing::info!("Analysis complete");
79+
}

jingle/src/analysis/back_edge/mod.rs

Lines changed: 80 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
use crate::analysis::Analysis;
22
use crate::analysis::cpa::lattice::JoinSemiLattice;
33
use crate::analysis::cpa::lattice::pcode::PcodeAddressLattice;
4-
use crate::analysis::cpa::residue::EmptyResidue;
4+
use crate::analysis::cpa::residue::Residue;
55
use crate::analysis::cpa::state::{AbstractState, LocationState, MergeOutcome, Successor};
66
use crate::analysis::cpa::{ConfigurableProgramAnalysis, IntoState};
7-
use crate::analysis::pcode_store::PcodeStore;
87
use crate::modeling::machine::cpu::concrete::ConcretePcodeAddress;
98
use jingle_sleigh::PcodeOperation;
109
use std::borrow::Borrow;
@@ -123,7 +122,10 @@ impl AbstractState for BackEdgeState {
123122
}
124123

125124
impl LocationState for BackEdgeState {
126-
fn get_operation<T: PcodeStore>(&self, t: &T) -> Option<PcodeOperation> {
125+
fn get_operation<'a, T: crate::analysis::pcode_store::PcodeStore + ?Sized>(
126+
&'a self,
127+
t: &'a T,
128+
) -> Option<crate::analysis::pcode_store::PcodeOpRef<'a>> {
127129
self.location.get_operation(t)
128130
}
129131

@@ -132,32 +134,90 @@ impl LocationState for BackEdgeState {
132134
}
133135
}
134136

135-
pub struct BackEdgeCPA {
136-
pub back_edges: Vec<(PcodeAddressLattice, PcodeAddressLattice)>,
137+
/// A reducer that identifies back-edges during the analysis.
138+
///
139+
/// A back-edge is an edge from a state to a previously visited state in its path.
140+
/// This reducer tracks all visited edges and identifies when a transition creates
141+
/// a back-edge by checking if the destination location appears in the source state's
142+
/// visited path.
143+
pub struct BackEdgeReducer {
144+
/// All visited edges (from_location, to_location)
145+
visited_edges: Vec<(ConcretePcodeAddress, ConcretePcodeAddress)>,
146+
/// Identified back-edges
147+
back_edges: BackEdges,
137148
}
138149

139-
impl Default for BackEdgeCPA {
140-
fn default() -> Self {
141-
Self::new()
150+
impl BackEdgeReducer {
151+
pub fn new() -> Self {
152+
Self {
153+
visited_edges: Vec::new(),
154+
back_edges: BackEdges::default(),
155+
}
142156
}
143-
}
144157

145-
impl BackEdgeCPA {
146-
pub fn new() -> Self {
158+
pub fn new_with_capacity(cap: usize) -> Self {
147159
Self {
148-
back_edges: Vec::new(),
160+
visited_edges: Vec::with_capacity(cap),
161+
back_edges: BackEdges::default(),
149162
}
150163
}
164+
}
165+
166+
impl Default for BackEdgeReducer {
167+
fn default() -> Self {
168+
Self::new()
169+
}
170+
}
171+
172+
impl Residue<BackEdgeState> for BackEdgeReducer {
173+
type Output = BackEdges;
151174

152-
/// Extract the computed back edges into a BackEdges structure
153-
pub fn get_back_edges(&self) -> BackEdges {
154-
let mut b = BackEdges::default();
155-
for (from, to) in &self.back_edges {
156-
if let (PcodeAddressLattice::Const(from), PcodeAddressLattice::Const(to)) = (from, to) {
157-
b.add(*from, *to);
175+
/// Track a state transition and identify if it's a back-edge.
176+
///
177+
/// A back-edge occurs when we transition from a state to a destination
178+
/// that appears in the source state's visited path.
179+
fn new_state(
180+
&mut self,
181+
state: &BackEdgeState,
182+
dest_state: &BackEdgeState,
183+
_op: &Option<crate::analysis::pcode_store::PcodeOpRef<'_>>,
184+
) {
185+
// Extract concrete addresses from both states
186+
if let (Some(from_addr), Some(to_addr)) = (state.get_location(), dest_state.get_location())
187+
{
188+
// Record this edge
189+
if !self.visited_edges.contains(&(from_addr, to_addr)) {
190+
self.visited_edges.push((from_addr, to_addr));
191+
}
192+
193+
// Check if this is a back-edge:
194+
// The destination is a back-edge if it appears in the source state's visited path
195+
if state.path_visits.contains(&dest_state.location) {
196+
self.back_edges.add(from_addr, to_addr);
158197
}
159198
}
160-
b
199+
}
200+
201+
fn new() -> Self {
202+
Self::new()
203+
}
204+
205+
fn finalize(self) -> Self::Output {
206+
self.back_edges
207+
}
208+
}
209+
210+
pub struct BackEdgeCPA;
211+
212+
impl Default for BackEdgeCPA {
213+
fn default() -> Self {
214+
Self::new()
215+
}
216+
}
217+
218+
impl BackEdgeCPA {
219+
pub fn new() -> Self {
220+
Self
161221
}
162222

163223
/// Inherent constructor for the analysis initial state.
@@ -174,20 +234,7 @@ impl BackEdgeCPA {
174234

175235
impl ConfigurableProgramAnalysis for BackEdgeCPA {
176236
type State = BackEdgeState;
177-
type Reducer = EmptyResidue<Self::State>;
178-
179-
fn residue(
180-
&mut self,
181-
old_state: &Self::State,
182-
new_state: &Self::State,
183-
_op: &Option<PcodeOperation>,
184-
) {
185-
if old_state.path_visits.contains(&new_state.location) {
186-
// Clone the locations since `old_state` and `new_state` are borrowed here.
187-
self.back_edges
188-
.push((old_state.location.clone(), new_state.location.clone()))
189-
}
190-
}
237+
type Reducer = BackEdgeReducer;
191238
}
192239

193240
impl Analysis for BackEdgeCPA {}

jingle/src/analysis/cfg/mod.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,19 @@ use petgraph::visit::EdgeRef;
99
use std::borrow::Borrow;
1010
use std::cell::RefCell;
1111
use std::collections::{HashMap, HashSet};
12-
use std::fmt::{Formatter, LowerHex};
12+
use std::fmt::{Display, Formatter, LowerHex};
1313
use std::rc::Rc;
1414

1515
pub(crate) mod model;
1616

1717
#[derive(Debug, Default, Copy, Clone, Hash)]
1818
pub struct EmptyEdge;
1919

20+
impl Display for EmptyEdge {
21+
fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result {
22+
Ok(())
23+
}
24+
}
2025
impl LowerHex for EmptyEdge {
2126
fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result {
2227
Ok(())
@@ -277,9 +282,13 @@ impl<N: CfgState, D: ModelTransition<N::Model>> PcodeCfg<N, D> {
277282
}
278283

279284
impl PcodeStore for PcodeCfg<ConcretePcodeAddress, PcodeOperation> {
280-
fn get_pcode_op_at<T: Borrow<ConcretePcodeAddress>>(&self, addr: T) -> Option<PcodeOperation> {
285+
fn get_pcode_op_at<'a, T: Borrow<ConcretePcodeAddress>>(
286+
&'a self,
287+
addr: T,
288+
) -> Option<crate::analysis::pcode_store::PcodeOpRef<'a>> {
281289
let addr = *addr.borrow();
282-
self.get_op_at(addr).cloned()
290+
self.get_op_at(addr)
291+
.map(crate::analysis::pcode_store::PcodeOpRef::from)
283292
}
284293
}
285294

jingle/src/analysis/cfg/model.rs

Lines changed: 2 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::JingleError;
2+
use crate::analysis::compound::CompoundState;
23
use crate::analysis::cpa::lattice::flat::FlatLattice;
34
use crate::analysis::cpa::state::StateDisplay;
45
use crate::modeling::machine::MachineState;
@@ -139,7 +140,7 @@ impl<'a, S: StateDisplay> std::fmt::Display for StateDisplayWrapper<'a, S> {
139140
self.0.fmt_state(f)
140141
}
141142
}
142-
impl<A: CfgState, B: StateDisplay + Clone + Debug + Hash + Eq> CfgState for (A, B) {
143+
impl<A: CfgState, B: StateDisplay + Clone + Debug + Hash + Eq> CfgState for CompoundState<A, B> {
143144
type Model = A::Model;
144145

145146
fn new_const(&self, i: &SleighArchInfo) -> Self::Model {
@@ -156,59 +157,3 @@ impl<A: CfgState, B: StateDisplay + Clone + Debug + Hash + Eq> CfgState for (A,
156157
self.0.location()
157158
}
158159
}
159-
160-
impl<
161-
A: CfgState,
162-
B: StateDisplay + Clone + Debug + Hash + Eq,
163-
C: StateDisplay + Clone + Debug + Hash + Eq,
164-
> CfgState for (A, B, C)
165-
{
166-
type Model = A::Model;
167-
168-
fn new_const(&self, i: &SleighArchInfo) -> Self::Model {
169-
self.0.new_const(i)
170-
}
171-
172-
fn model_id(&self) -> String {
173-
// Include display outputs from the second and third elements.
174-
format!(
175-
"{}_{}_{}",
176-
self.0.model_id(),
177-
StateDisplayWrapper(&self.1),
178-
StateDisplayWrapper(&self.2)
179-
)
180-
}
181-
182-
fn location(&self) -> Option<ConcretePcodeAddress> {
183-
self.0.location()
184-
}
185-
}
186-
187-
impl<
188-
A: CfgState,
189-
B: StateDisplay + Clone + Debug + Hash + Eq,
190-
C: StateDisplay + Clone + Debug + Hash + Eq,
191-
D: StateDisplay + Clone + Debug + Hash + Eq,
192-
> CfgState for (A, B, C, D)
193-
{
194-
type Model = A::Model;
195-
196-
fn new_const(&self, i: &SleighArchInfo) -> Self::Model {
197-
self.0.new_const(i)
198-
}
199-
200-
fn model_id(&self) -> String {
201-
// Include display outputs from elements 2, 3 and 4.
202-
format!(
203-
"{}_{}_{}_{}",
204-
self.0.model_id(),
205-
StateDisplayWrapper(&self.1),
206-
StateDisplayWrapper(&self.2),
207-
StateDisplayWrapper(&self.3)
208-
)
209-
}
210-
211-
fn location(&self) -> Option<ConcretePcodeAddress> {
212-
self.0.location()
213-
}
214-
}

0 commit comments

Comments
 (0)