Skip to content

Commit 1803236

Browse files
committed
fix: support runtime DWARF endianness
Load DWARF sections with the object's runtime endianness and keep that reader through call-site, CFI, and expression parsing so big-endian DWARF stays correct. Add targeted big-endian regressions for call_target expressions and DW_OP_addr decoding.
1 parent cdbf3fe commit 1803236

8 files changed

Lines changed: 243 additions & 53 deletions

File tree

ghostscope-dwarf/src/binary/mapped_file.rs

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use std::ops::Deref;
22
use std::path::{Path, PathBuf};
33
use std::sync::Arc;
44

5+
use object::{Endianness, Object};
6+
57
/// Internal helper for mmap-backed file access and object parsing.
68
#[derive(Debug)]
79
pub(crate) struct MappedFile {
@@ -29,19 +31,24 @@ impl MappedFile {
2931
}
3032

3133
/// Create a DWARF reader over the whole mapped file.
32-
pub fn dwarf_reader(file: Arc<Self>) -> DwarfReader {
33-
DwarfReader::new(DwarfBytes::Mapped(file), gimli::LittleEndian)
34+
pub fn dwarf_reader(file: Arc<Self>, endian: DwarfEndian) -> DwarfReader {
35+
DwarfReader::new(DwarfBytes::Mapped(file), endian)
3436
}
3537

3638
/// Create a DWARF reader over a file range without copying the mapped data.
37-
pub fn dwarf_reader_range(file: Arc<Self>, start: u64, size: u64) -> Option<DwarfReader> {
39+
pub fn dwarf_reader_range(
40+
file: Arc<Self>,
41+
start: u64,
42+
size: u64,
43+
endian: DwarfEndian,
44+
) -> Option<DwarfReader> {
3845
let start = usize::try_from(start).ok()?;
3946
let size = usize::try_from(size).ok()?;
4047
let end = start.checked_add(size)?;
4148
if end > file.as_bytes().len() {
4249
return None;
4350
}
44-
Some(Self::dwarf_reader(file).range(start..end))
51+
Some(Self::dwarf_reader(file, endian).range(start..end))
4552
}
4653
}
4754

@@ -66,13 +73,30 @@ impl Deref for DwarfBytes {
6673
unsafe impl gimli::StableDeref for DwarfBytes {}
6774
unsafe impl gimli::CloneStableDeref for DwarfBytes {}
6875

69-
pub(crate) type DwarfReader = gimli::EndianReader<gimli::LittleEndian, DwarfBytes>;
76+
pub(crate) type DwarfEndian = gimli::RunTimeEndian;
77+
pub(crate) type DwarfReader = gimli::EndianReader<DwarfEndian, DwarfBytes>;
7078
pub(crate) type DwarfData = gimli::Dwarf<DwarfReader>;
7179

80+
#[cfg(test)]
7281
pub(crate) fn dwarf_reader_from_arc(bytes: Arc<[u8]>) -> DwarfReader {
73-
DwarfReader::new(DwarfBytes::Owned(bytes), gimli::LittleEndian)
82+
dwarf_reader_from_arc_with_endian(bytes, gimli::RunTimeEndian::Little)
83+
}
84+
85+
#[cfg(test)]
86+
pub(crate) fn dwarf_reader_from_arc_with_endian(
87+
bytes: Arc<[u8]>,
88+
endian: DwarfEndian,
89+
) -> DwarfReader {
90+
DwarfReader::new(DwarfBytes::Owned(bytes), endian)
7491
}
7592

76-
pub(crate) fn empty_dwarf_reader() -> DwarfReader {
77-
dwarf_reader_from_arc(Arc::<[u8]>::from(&[][..]))
93+
pub(crate) fn empty_dwarf_reader_with_endian(endian: DwarfEndian) -> DwarfReader {
94+
DwarfReader::new(DwarfBytes::Owned(Arc::<[u8]>::from(&[][..])), endian)
95+
}
96+
97+
pub(crate) fn dwarf_endian_from_object(object: &object::File<'_>) -> DwarfEndian {
98+
match object.endianness() {
99+
Endianness::Little => gimli::RunTimeEndian::Little,
100+
Endianness::Big => gimli::RunTimeEndian::Big,
101+
}
78102
}

ghostscope-dwarf/src/binary/mod.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ pub(crate) mod debuglink;
22
pub(crate) mod mapped_file;
33

44
pub(crate) use debuglink::try_load_debug_file;
5+
pub(crate) use mapped_file::{
6+
dwarf_endian_from_object, empty_dwarf_reader_with_endian, DwarfData, DwarfEndian, DwarfReader,
7+
MappedFile,
8+
};
59
#[cfg(test)]
6-
pub(crate) use mapped_file::dwarf_reader_from_arc;
7-
pub(crate) use mapped_file::{empty_dwarf_reader, DwarfData, DwarfReader, MappedFile};
10+
pub(crate) use mapped_file::{dwarf_reader_from_arc, dwarf_reader_from_arc_with_endian};

ghostscope-dwarf/src/index/block_index.rs

Lines changed: 127 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -554,14 +554,11 @@ impl<'a> BlockIndexBuilder<'a> {
554554
entry: &gimli::DebuggingInformationEntry<DwarfReader>,
555555
) -> Option<u64> {
556556
let attr = entry.attr(gimli::constants::DW_AT_call_target)?;
557-
let gimli::AttributeValue::Exprloc(expr) = attr.value() else {
557+
let gimli::AttributeValue::Exprloc(mut expr) = attr.value() else {
558558
return None;
559559
};
560-
let expr_bytes = expr.0.to_slice().ok()?;
561-
let mut expression =
562-
gimli::Expression(gimli::EndianSlice::new(&expr_bytes, gimli::LittleEndian));
563-
let first = gimli::Operation::parse(&mut expression.0, unit.encoding()).ok()?;
564-
if !expression.0.is_empty() {
560+
let first = gimli::Operation::parse(&mut expr.0, unit.encoding()).ok()?;
561+
if !expr.0.is_empty() {
565562
return None;
566563
}
567564
match first {
@@ -589,14 +586,11 @@ impl<'a> BlockIndexBuilder<'a> {
589586
entry: &gimli::DebuggingInformationEntry<DwarfReader>,
590587
) -> Option<u16> {
591588
let attr = entry.attr(gimli::constants::DW_AT_location)?;
592-
let gimli::AttributeValue::Exprloc(expr) = attr.value() else {
589+
let gimli::AttributeValue::Exprloc(mut expr) = attr.value() else {
593590
return None;
594591
};
595-
let expr_bytes = expr.0.to_slice().ok()?;
596-
let mut expression =
597-
gimli::Expression(gimli::EndianSlice::new(&expr_bytes, gimli::LittleEndian));
598-
let first = gimli::Operation::parse(&mut expression.0, unit.encoding()).ok()?;
599-
if !expression.0.is_empty() {
592+
let first = gimli::Operation::parse(&mut expr.0, unit.encoding()).ok()?;
593+
if !expr.0.is_empty() {
600594
return None;
601595
}
602596
match first {
@@ -623,9 +617,9 @@ impl<'a> BlockIndexBuilder<'a> {
623617
_ => None,
624618
}
625619
})?;
626-
let expr_bytes = expr.0.to_slice().ok()?;
627620
ExpressionEvaluator::parse_expression_to_steps_in_unit(
628-
&expr_bytes,
621+
expr.0.to_slice().ok().as_deref().unwrap_or(&[]),
622+
expr.0.endian(),
629623
unit,
630624
self.dwarf,
631625
return_pc,
@@ -634,17 +628,15 @@ impl<'a> BlockIndexBuilder<'a> {
634628
None,
635629
)
636630
.ok()
637-
.or_else(|| Self::lower_entry_value_call_site_register(unit, &expr_bytes))
631+
.or_else(|| Self::lower_entry_value_call_site_register(unit, expr))
638632
}
639633

640634
fn lower_entry_value_call_site_register(
641635
unit: &gimli::Unit<DwarfReader>,
642-
expr_bytes: &[u8],
636+
mut expr: gimli::Expression<DwarfReader>,
643637
) -> Option<Vec<ComputeStep>> {
644-
let mut expression =
645-
gimli::Expression(gimli::EndianSlice::new(expr_bytes, gimli::LittleEndian));
646-
let first = gimli::Operation::parse(&mut expression.0, unit.encoding()).ok()?;
647-
if !expression.0.is_empty() {
638+
let first = gimli::Operation::parse(&mut expr.0, unit.encoding()).ok()?;
639+
if !expr.0.is_empty() {
648640
return None;
649641
}
650642
let gimli::Operation::EntryValue { expression: inner } = first else {
@@ -681,13 +673,13 @@ impl<'a> BlockIndexBuilder<'a> {
681673
#[cfg(test)]
682674
mod tests {
683675
use super::*;
684-
use crate::binary::dwarf_reader_from_arc;
676+
use crate::binary::{dwarf_reader_from_arc, dwarf_reader_from_arc_with_endian};
685677
use gimli::constants;
686678
use gimli::write::{
687679
Address, AttributeValue as WriteAttributeValue, Dwarf as WriteDwarf, EndianVec,
688680
Expression as WriteExpression, LineProgram, Sections, Unit,
689681
};
690-
use gimli::{Format, LittleEndian, Register};
682+
use gimli::{BigEndian, Format, LittleEndian, Register};
691683
use std::sync::Arc;
692684

693685
fn build_call_site_fixture(
@@ -906,6 +898,95 @@ mod tests {
906898
(read_dwarf, cu_offset)
907899
}
908900

901+
fn build_big_endian_call_target_expr_fixture(
902+
) -> (gimli::Dwarf<DwarfReader>, gimli::DebugInfoOffset) {
903+
let encoding = gimli::Encoding {
904+
format: Format::Dwarf32,
905+
version: 5,
906+
address_size: 8,
907+
};
908+
909+
let mut dwarf = WriteDwarf::new();
910+
let unit_id = dwarf.units.add(Unit::new(encoding, LineProgram::none()));
911+
let unit = dwarf.units.get_mut(unit_id);
912+
let root = unit.root();
913+
914+
let caller_id = unit.add(root, constants::DW_TAG_subprogram);
915+
let caller = unit.get_mut(caller_id);
916+
caller.set(
917+
constants::DW_AT_name,
918+
WriteAttributeValue::String(b"caller".to_vec()),
919+
);
920+
caller.set(
921+
constants::DW_AT_low_pc,
922+
WriteAttributeValue::Address(Address::Constant(0x1000)),
923+
);
924+
caller.set(constants::DW_AT_high_pc, WriteAttributeValue::Udata(0x40));
925+
926+
let callee_id = unit.add(root, constants::DW_TAG_subprogram);
927+
let callee = unit.get_mut(callee_id);
928+
callee.set(
929+
constants::DW_AT_name,
930+
WriteAttributeValue::String(b"callee".to_vec()),
931+
);
932+
callee.set(
933+
constants::DW_AT_low_pc,
934+
WriteAttributeValue::Address(Address::Constant(0x1200)),
935+
);
936+
callee.set(constants::DW_AT_high_pc, WriteAttributeValue::Udata(0x10));
937+
938+
let call_site_id = unit.add(caller_id, constants::DW_TAG_call_site);
939+
let mut target_expr = WriteExpression::new();
940+
target_expr.op_addr(Address::Constant(0x1200));
941+
unit.get_mut(call_site_id).set(
942+
constants::DW_AT_call_target,
943+
WriteAttributeValue::Exprloc(target_expr),
944+
);
945+
unit.get_mut(call_site_id).set(
946+
constants::DW_AT_call_return_pc,
947+
WriteAttributeValue::Address(Address::Constant(0x1018)),
948+
);
949+
950+
let param_id = unit.add(call_site_id, constants::DW_TAG_call_site_parameter);
951+
let param = unit.get_mut(param_id);
952+
let mut location = WriteExpression::new();
953+
location.op_reg(Register(5));
954+
param.set(
955+
constants::DW_AT_location,
956+
WriteAttributeValue::Exprloc(location),
957+
);
958+
let mut value = WriteExpression::new();
959+
value.op_constu(42);
960+
param.set(
961+
constants::DW_AT_call_value,
962+
WriteAttributeValue::Exprloc(value),
963+
);
964+
965+
let mut sections = Sections::new(EndianVec::new(BigEndian));
966+
dwarf.write(&mut sections).unwrap();
967+
968+
let dwarf_sections: gimli::DwarfSections<Vec<u8>> = gimli::DwarfSections::load(|id| {
969+
Ok::<_, gimli::Error>(
970+
sections
971+
.get(id)
972+
.map(|section| section.slice().to_vec())
973+
.unwrap_or_default(),
974+
)
975+
})
976+
.unwrap();
977+
978+
let read_dwarf = dwarf_sections.borrow(|section| {
979+
dwarf_reader_from_arc_with_endian(
980+
Arc::<[u8]>::from(section.as_slice()),
981+
gimli::RunTimeEndian::Big,
982+
)
983+
});
984+
let mut units = read_dwarf.units();
985+
let header = units.next().unwrap().unwrap();
986+
let cu_offset = header.debug_info_offset().unwrap();
987+
(read_dwarf, cu_offset)
988+
}
989+
909990
#[test]
910991
fn build_for_unit_indexes_standard_call_site_values() {
911992
let (dwarf, cu_offset) = build_call_site_fixture(
@@ -1178,6 +1259,30 @@ mod tests {
11781259
);
11791260
}
11801261

1262+
#[test]
1263+
fn add_functions_links_big_endian_incoming_call_sites_by_call_target_expr() {
1264+
let (dwarf, cu_offset) = build_big_endian_call_target_expr_fixture();
1265+
let builder = BlockIndexBuilder::new(&dwarf);
1266+
let functions = builder
1267+
.build_for_unit(cu_offset)
1268+
.expect("fixture CU should build");
1269+
1270+
let mut block_index = BlockIndex::new();
1271+
block_index.add_functions(functions);
1272+
1273+
let callee = block_index
1274+
.find_function_by_pc(0x1200)
1275+
.expect("callee function should be indexed");
1276+
let incoming = callee
1277+
.incoming_call_sites
1278+
.get(&0x1018)
1279+
.map(Vec::as_slice)
1280+
.expect("callee should have one incoming call site");
1281+
assert_eq!(incoming.len(), 1);
1282+
assert_eq!(incoming[0].call_origin, None);
1283+
assert_eq!(incoming[0].call_target, Some(0x1200));
1284+
}
1285+
11811286
#[test]
11821287
fn entry_value_parameter_lookup_uses_nearest_prior_return_pc() {
11831288
let mut function = FunctionBlocks::new(gimli::DebugInfoOffset(0), gimli::UnitOffset(0));

ghostscope-dwarf/src/index/cfi_index.rs

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
//! by utilizing eh_frame_hdr's binary search table when available.
55
66
use crate::{
7-
binary::{DwarfReader, MappedFile},
7+
binary::{dwarf_endian_from_object, DwarfReader, MappedFile},
88
core::{CallerFrameRecovery, CfaResult, ComputeStep, Result},
99
};
1010
use anyhow::{anyhow, Context};
@@ -46,6 +46,7 @@ impl CfiIndex {
4646
let object = file_data
4747
.parse_object()
4848
.context("Failed to parse object file")?;
49+
let endian = dwarf_endian_from_object(&object);
4950

5051
// Load eh_frame section (required)
5152
let eh_frame_section = object
@@ -56,9 +57,13 @@ impl CfiIndex {
5657
let (eh_frame_start, eh_frame_size) = eh_frame_section
5758
.file_range()
5859
.ok_or_else(|| anyhow!(".eh_frame section has no file range"))?;
59-
let eh_frame_reader =
60-
MappedFile::dwarf_reader_range(Arc::clone(&file_data), eh_frame_start, eh_frame_size)
61-
.ok_or_else(|| anyhow!("Invalid .eh_frame range in mapped file"))?;
60+
let eh_frame_reader = MappedFile::dwarf_reader_range(
61+
Arc::clone(&file_data),
62+
eh_frame_start,
63+
eh_frame_size,
64+
endian,
65+
)
66+
.ok_or_else(|| anyhow!("Invalid .eh_frame range in mapped file"))?;
6267
let eh_frame = EhFrame::from(eh_frame_reader);
6368

6469
// Try to load eh_frame_hdr for fast lookup (optional)
@@ -67,9 +72,13 @@ impl CfiIndex {
6772
let (hdr_start, hdr_size) = hdr_section_obj
6873
.file_range()
6974
.ok_or_else(|| anyhow!(".eh_frame_hdr section has no file range"))?;
70-
let hdr_reader =
71-
MappedFile::dwarf_reader_range(Arc::clone(&file_data), hdr_start, hdr_size)
72-
.ok_or_else(|| anyhow!("Invalid .eh_frame_hdr range in mapped file"))?;
75+
let hdr_reader = MappedFile::dwarf_reader_range(
76+
Arc::clone(&file_data),
77+
hdr_start,
78+
hdr_size,
79+
endian,
80+
)
81+
.ok_or_else(|| anyhow!("Invalid .eh_frame_hdr range in mapped file"))?;
7382
let hdr_section = EhFrameHdr::from(hdr_reader);
7483

7584
// Parse with proper address_size

ghostscope-dwarf/src/objfile/loading.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
use super::LoadedObjfile;
22
use crate::{
3-
binary::{empty_dwarf_reader, try_load_debug_file, DwarfData, MappedFile},
3+
binary::{
4+
dwarf_endian_from_object, empty_dwarf_reader_with_endian, try_load_debug_file, DwarfData,
5+
MappedFile,
6+
},
47
core::{mapping::ModuleMapping, Result},
58
index::{BlockIndex, CfiIndex, TypeNameIndex},
69
parser::DetailedParser,
@@ -239,18 +242,20 @@ impl LoadedObjfile {
239242

240243
fn load_dwarf_sections(file_data: &Arc<MappedFile>) -> Result<DwarfData> {
241244
let object = file_data.parse_object()?;
245+
let endian = dwarf_endian_from_object(&object);
242246

243247
let load_section = |id: gimli::SectionId| -> Result<_> {
244248
if let Some(section) = object.section_by_name(id.name()) {
245249
if let Some((start, size)) = section.file_range() {
246-
MappedFile::dwarf_reader_range(Arc::clone(file_data), start, size).ok_or_else(
247-
|| anyhow::anyhow!("Invalid DWARF section range for {}", id.name()),
248-
)
250+
MappedFile::dwarf_reader_range(Arc::clone(file_data), start, size, endian)
251+
.ok_or_else(|| {
252+
anyhow::anyhow!("Invalid DWARF section range for {}", id.name())
253+
})
249254
} else {
250-
Ok(empty_dwarf_reader())
255+
Ok(empty_dwarf_reader_with_endian(endian))
251256
}
252257
} else {
253-
Ok(empty_dwarf_reader())
258+
Ok(empty_dwarf_reader_with_endian(endian))
254259
}
255260
};
256261

ghostscope-dwarf/src/objfile/variables.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,7 @@ impl LoadedObjfile {
628628
gimli::AttributeValue::Exprloc(expr) => {
629629
ExpressionEvaluator::parse_expression_in_unit(
630630
expr.0.to_slice().ok().as_deref().unwrap_or(&[]),
631+
expr.0.endian(),
631632
&unit,
632633
dwarf,
633634
pc,

0 commit comments

Comments
 (0)