Skip to content

Commit 33a1f9c

Browse files
Rework executable id generation (#143)
The executable is now based on the first 8 bytes of the build id, which reduces the io and cpu needed to hash the code section from every binary. This is now only done for executables without GNU or Go build id. Besides the performance improvement, this will standardise how build ids are handle once kernel code sections (main executable but also modules) are processed. Note: this is a backwards-incompatible change as the executable id will change for GNU and Go binaries. Test Plan ========= Unit tests + ran the profiler for a while w/o issues
1 parent f5c36ed commit 33a1f9c

File tree

6 files changed

+95
-64
lines changed

6 files changed

+95
-64
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ itertools = "0.14.0"
5151
lightswitch-metadata = { path = "lightswitch-metadata", version = "0.1.0" }
5252
lightswitch-proto = { path = "lightswitch-proto", version = "0.1.0" }
5353
lightswitch-capabilities = { path = "lightswitch-capabilities", version = "0.1.0" }
54-
lightswitch-object = { path = "lightswitch-object", version = "0.1.0" }
54+
lightswitch-object = { path = "lightswitch-object", version = "0.2.0" }
5555
memmap2 = { workspace = true }
5656
anyhow = { workspace = true }
5757
object = { workspace = true }

lightswitch-object/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "lightswitch-object"
3-
version = "0.1.1"
3+
version = "0.2.0"
44
edition = "2021"
55
description = "Deals with object files"
66
license = "MIT"

lightswitch-object/src/buildid.rs

Lines changed: 71 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,97 @@
11
use std::fmt;
2+
use std::fmt::Debug;
23
use std::fmt::Display;
34
use std::fmt::Formatter;
45
use std::str;
56

6-
use anyhow::Result;
77
use data_encoding::HEXLOWER;
88
use ring::digest::Digest;
99

10+
#[derive(Hash, Eq, PartialEq, Clone)]
11+
pub enum BuildIdFlavour {
12+
Gnu,
13+
Go,
14+
Sha256,
15+
}
16+
1017
/// Represents a build id, which could be either a GNU build ID, the build
1118
/// ID from Go, or a Sha256 hash of the code in the .text section.
12-
#[derive(Hash, Eq, PartialEq, Clone, Debug)]
13-
pub enum BuildId {
14-
Gnu(String),
15-
Go(String),
16-
Sha256(String),
19+
#[derive(Hash, Eq, PartialEq, Clone)]
20+
pub struct BuildId {
21+
pub flavour: BuildIdFlavour,
22+
pub data: Vec<u8>,
1723
}
1824

1925
impl BuildId {
2026
pub fn gnu_from_bytes(bytes: &[u8]) -> Self {
21-
BuildId::Gnu(
22-
bytes
23-
.iter()
24-
.map(|b| format!("{:02x}", b))
25-
.collect::<Vec<_>>()
26-
.join(""),
27-
)
27+
BuildId {
28+
flavour: BuildIdFlavour::Gnu,
29+
data: bytes.to_vec(),
30+
}
2831
}
2932

30-
pub fn go_from_bytes(bytes: &[u8]) -> Result<Self> {
31-
Ok(BuildId::Go(str::from_utf8(bytes)?.to_string()))
33+
pub fn go_from_bytes(bytes: &[u8]) -> Self {
34+
BuildId {
35+
flavour: BuildIdFlavour::Go,
36+
data: bytes.to_vec(),
37+
}
3238
}
3339

3440
pub fn sha256_from_digest(digest: &Digest) -> Self {
35-
BuildId::Sha256(HEXLOWER.encode(digest.as_ref()))
41+
BuildId {
42+
flavour: BuildIdFlavour::Sha256,
43+
data: digest.as_ref().to_vec(),
44+
}
3645
}
37-
}
3846

39-
impl Display for BuildId {
40-
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
41-
match self {
42-
BuildId::Gnu(build_id) => {
43-
write!(f, "gnu-{}", build_id)
44-
}
45-
BuildId::Go(build_id) => {
46-
write!(f, "go-{}", build_id)
47+
pub fn build_id_formatted(&self) -> String {
48+
match self.flavour {
49+
BuildIdFlavour::Gnu => {
50+
self.data
51+
.iter()
52+
.fold(String::with_capacity(self.data.len() * 2), |mut res, el| {
53+
res.push_str(&format!("{:02x}", el));
54+
res
55+
})
4756
}
48-
BuildId::Sha256(build_id) => {
49-
write!(f, "sha256-{}", build_id)
57+
BuildIdFlavour::Go => {
58+
match str::from_utf8(&self.data) {
59+
Ok(res) => res.to_string(),
60+
// This should never happen in practice.
61+
Err(e) => format!("error converting go build id: {}", e),
62+
}
5063
}
64+
BuildIdFlavour::Sha256 => HEXLOWER.encode(self.data.as_ref()),
5165
}
5266
}
67+
68+
pub fn formatted(&self) -> String {
69+
format!("{}-{}", self.flavour, self.build_id_formatted())
70+
}
71+
}
72+
73+
impl Display for BuildIdFlavour {
74+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
75+
let name = match self {
76+
BuildIdFlavour::Gnu => "gnu",
77+
BuildIdFlavour::Go => "go",
78+
BuildIdFlavour::Sha256 => "sha256",
79+
};
80+
81+
write!(f, "{}", name)
82+
}
83+
}
84+
85+
impl Display for BuildId {
86+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
87+
write!(f, "{}", self.formatted())
88+
}
89+
}
90+
91+
impl Debug for BuildId {
92+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
93+
write!(f, "BuildId({})", self.formatted())
94+
}
5395
}
5496

5597
#[cfg(test)]
@@ -58,15 +100,13 @@ mod tests {
58100
use ring::digest::{Context, SHA256};
59101

60102
#[test]
61-
fn test_buildid_constructors() {
103+
fn test_buildid() {
62104
assert_eq!(
63105
BuildId::gnu_from_bytes(&[0xbe, 0xef, 0xca, 0xfe]).to_string(),
64106
"gnu-beefcafe"
65107
);
66108
assert_eq!(
67-
BuildId::go_from_bytes("fake".as_bytes())
68-
.unwrap()
69-
.to_string(),
109+
BuildId::go_from_bytes("fake".as_bytes()).to_string(),
70110
"go-fake"
71111
);
72112

@@ -78,11 +118,4 @@ mod tests {
78118
"sha256-b80ad5b1508835ca2191ac800f4bb1a5ae1c3e47f13a8f5ed1b1593337ae5af5"
79119
);
80120
}
81-
82-
#[test]
83-
fn test_buildid_display() {
84-
assert_eq!(BuildId::Gnu("fake".into()).to_string(), "gnu-fake");
85-
assert_eq!(BuildId::Go("fake".into()).to_string(), "go-fake");
86-
assert_eq!(BuildId::Sha256("fake".into()).to_string(), "sha256-fake");
87-
}
88121
}

lightswitch-object/src/object.rs

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ pub struct ObjectFile {
3939
/// before. Rust guarantees that fields are dropped in the order they are defined.
4040
object: object::File<'static>, // Its lifetime is tied to the `mmap` below.
4141
mmap: Box<Mmap>,
42-
code_hash: Digest,
42+
build_id: BuildId,
4343
}
4444

4545
impl ObjectFile {
@@ -54,27 +54,28 @@ impl ObjectFile {
5454
// `object` by defining `object` before.
5555
let object =
5656
unsafe { std::mem::transmute::<object::File<'_>, object::File<'static>>(object) };
57-
let Some(code_hash) = code_hash(&object) else {
58-
return Err(anyhow!("code hash is None"));
59-
};
57+
let build_id = Self::read_build_id(&object)?;
58+
6059
Ok(ObjectFile {
6160
object,
6261
mmap,
63-
code_hash,
62+
build_id,
6463
})
6564
}
6665

67-
/// Returns an identifier for the executable using the first 8 bytes of the Sha256 of the code section.
66+
/// Returns an identifier for the executable using the first 8 bytes of the build id.
6867
pub fn id(&self) -> Result<ExecutableId> {
69-
let mut buffer = [0; 8];
70-
let _ = self.code_hash.as_ref().read(&mut buffer)?;
71-
Ok(u64::from_ne_bytes(buffer))
68+
Ok(u64::from_ne_bytes(self.build_id.data[..8].try_into()?))
69+
}
70+
71+
/// Returns the executable build ID.
72+
pub fn build_id(&self) -> &BuildId {
73+
&self.build_id
7274
}
7375

7476
/// Returns the executable build ID if present. If no GNU build ID and no Go build ID
7577
/// are found it returns the hash of the text section.
76-
pub fn build_id(&self) -> Result<BuildId> {
77-
let object = &self.object;
78+
pub fn read_build_id(object: &object::File<'static>) -> Result<BuildId> {
7879
let gnu_build_id = object.build_id()?;
7980

8081
if let Some(data) = gnu_build_id {
@@ -85,13 +86,16 @@ impl ObjectFile {
8586
for section in object.sections() {
8687
if section.name()? == ".note.go.buildid" {
8788
if let Ok(data) = section.data() {
88-
return BuildId::go_from_bytes(data);
89+
return Ok(BuildId::go_from_bytes(data));
8990
}
9091
}
9192
}
9293

9394
// No build id (Rust, some compilers and Linux distributions).
94-
Ok(BuildId::sha256_from_digest(&self.code_hash))
95+
let Some(code_hash) = code_hash(object) else {
96+
return Err(anyhow!("code hash is None"));
97+
};
98+
Ok(BuildId::sha256_from_digest(&code_hash))
9599
}
96100

97101
/// Returns whether the object has debug symbols.

src/profiler.rs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,12 +1591,9 @@ impl Profiler {
15911591
return Err(anyhow!("Go applications are not supported yet"));
15921592
}
15931593

1594-
let Ok(build_id) = object_file.build_id() else {
1595-
continue;
1596-
};
1597-
1594+
let build_id = object_file.build_id();
15981595
let Ok(executable_id) = object_file.id() else {
1599-
debug!("could not get id for object file: {}", abs_path);
1596+
info!("could not get id for object file: {}", abs_path);
16001597
continue;
16011598
};
16021599

@@ -1637,7 +1634,7 @@ impl Profiler {
16371634
};
16381635
let res = self.debug_info_manager.add_if_not_present(
16391636
&name,
1640-
&build_id,
1637+
build_id,
16411638
executable_id,
16421639
&abs_path,
16431640
);
@@ -1701,10 +1698,6 @@ impl Profiler {
17011698
debug!("vDSO object file id failed");
17021699
continue;
17031700
};
1704-
let Ok(build_id) = object_file.build_id() else {
1705-
debug!("vDSO object file build_id failed");
1706-
continue;
1707-
};
17081701
let Ok(file) = std::fs::File::open(&vdso_path) else {
17091702
debug!("vDSO object file open failed");
17101703
continue;
@@ -1713,6 +1706,7 @@ impl Profiler {
17131706
debug!("vDSO elf_load_segments failed");
17141707
continue;
17151708
};
1709+
let build_id = object_file.build_id().clone();
17161710

17171711
object_files.insert(
17181712
executable_id,

0 commit comments

Comments
 (0)