Skip to content

[HWASan] Fix symbol indexing #135967

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 56 additions & 15 deletions compiler-rt/lib/hwasan/scripts/hwasan_symbolize
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ if sys.version_info.major < 3:
Ehdr_size = 64
e_shnum_offset = 60
e_shoff_offset = 40

e_shstrndx_offset = 62
Shdr_size = 64
sh_name_offset = 0
sh_type_offset = 4
sh_offset_offset = 24
sh_size_offset = 32
Expand All @@ -62,33 +63,70 @@ def handle_Nhdr(mv, sh_size):
offset += Nhdr_size + align_up(n_namesz, 4) + align_up(n_descsz, 4)
return None

def handle_Shdr(mv):
def handle_shstrtab(mv, e_shoff):
e_shstrndx, = struct.unpack_from('<H', buffer=mv, offset=e_shstrndx_offset)

start_shstrndx = e_shoff + e_shstrndx * Shdr_size
shstrndx_sh = mv[start_shstrndx: start_shstrndx + Shdr_size]
_, shstrndx_sh_offset, shstrndx_sh_size = handle_Shdr(shstrndx_sh)
return mv[shstrndx_sh_offset:shstrndx_sh_offset + shstrndx_sh_size]

def shstrtab_sh_name(mv):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would just call this read_string

name = ""
for byte in mv:
char = chr(byte)
if char == '\x00':
break
name += char
return name

def unpack_sh_type(mv):
sh_type, = struct.unpack_from('<I', buffer=mv, offset=sh_type_offset)
if sh_type != SHT_NOTE:
return None, None
return sh_type

def handle_Shdr(mv):
name_offset, = struct.unpack_from('<I', buffer=mv, offset=sh_name_offset)
sh_offset, = struct.unpack_from('<Q', buffer=mv, offset=sh_offset_offset)
sh_size, = struct.unpack_from('<Q', buffer=mv, offset=sh_size_offset)
return sh_offset, sh_size
return name_offset, sh_offset, sh_size

def handle_elf(mv):
# \x02 is ELFCLASS64, \x01 is ELFDATA2LSB. HWASan currently only works on
# 64-bit little endian platforms (x86_64 and ARM64). If this changes, we will
# have to extend the parsing code.
if mv[:6] != b'\x7fELF\x02\x01':
return None
found_symbols = False
bid = None
e_shnum, = struct.unpack_from('<H', buffer=mv, offset=e_shnum_offset)
e_shoff, = struct.unpack_from('<Q', buffer=mv, offset=e_shoff_offset)

# Section where all the section header names are stored
shstr = handle_shstrtab(mv, e_shoff)

for i in range(0, e_shnum):
start = e_shoff + i * Shdr_size
sh_offset, sh_size = handle_Shdr(mv[start: start + Shdr_size])
if sh_offset is None:
continue
note_hdr = mv[sh_offset: sh_offset + sh_size]
result = handle_Nhdr(note_hdr, sh_size)
if result is not None:
return result
sh = mv[start: start + Shdr_size]
sh_name_offset, sh_offset, sh_size = handle_Shdr(sh)
sh_name = shstrtab_sh_name(shstr[sh_name_offset:])
sh_type = unpack_sh_type(sh)

if sh_name == ".debug_info":
found_symbols = True
if sh_type == SHT_NOTE:
if sh_offset is None:
continue
note_hdr = mv[sh_offset: sh_offset + sh_size]
result = handle_Nhdr(note_hdr, sh_size)
if result is not None:
bid = result

if found_symbols:
return bid
else:
return None

def get_buildid(filename):
def read_elf(filename):
with open(filename, "r") as fd:
if os.fstat(fd.fileno()).st_size < Ehdr_size:
return None
Expand Down Expand Up @@ -200,7 +238,7 @@ class Symbolizer:
if os.path.exists(full_path):
return full_path
if name not in self.__warnings:
print("Could not find symbols for", name, file=sys.stderr)
print("Could not find symbols for {} (Build ID: {})".format(name, buildid), file=sys.stderr)
self.__warnings.add(name)
return None

Expand Down Expand Up @@ -268,13 +306,16 @@ class Symbolizer:
for fn in fnames:
filename = os.path.join(dname, fn)
try:
bid = get_buildid(filename)
bid = read_elf(filename)
except FileNotFoundError:
continue
except Exception as e:
print("Failed to parse {}: {}".format(filename, e), file=sys.stderr)
continue
if bid is not None:
if bid in self.__index:
print("Duplicate build ID {} for {} and {}".format(bid, self.__index[bid], filename), file=sys.stderr)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to crash because we have the same file twice? Couldn't this happen through symlinks or something?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I agree that this is too harsh. Maybe we emit in verbose mode that two shared objects were found with the same build id at different locations?

Copy link
Contributor

@fmayer fmayer Apr 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning with verbose is OK. preferably we would only do that if the files actually differ, but it's not a hard requirement

sys.exit(1)
self.__index[bid] = filename

def symbolize_line(self, line):
Expand Down