From d3c7baeeb8d9b2a64344683c5b92f108198e9de7 Mon Sep 17 00:00:00 2001 From: Daniel Bertalan Date: Sun, 15 Sep 2024 12:33:35 +0200 Subject: [PATCH 1/3] [DebugInfo] Add fast path for parsing DW_TAG_compile_unit abbrevs In `DWARFDebugInfoEntry::extractFast`, we were parsing all abbreviation declarations belonging to the compilation unit by calling the `getAbbreviations` method. This resulted in a large overhead (mostly vector resizes and ULEB128 parsing) in cases where only the Compilation Unit DIE ended up being used. As `DW_TAG_compile_unit` typically comes first in the abbreviation table, this commit adds a fast-path function (`tryExtractCUAbbrevFast`) which attempts to read only the first abbreviation, without constructing a full `DWARFAbbreviationDeclarationSet`. This significantly speeds up `ld64.lld`'s generation of `N_OSO` stab information (which needs `DW_AT_name` from the Compilation Unit DIE). The following measurement was taken on an M1 Mac Mini linking Chromium with full debug info: x: before +: after N Min Max Median Avg Stddev x 15 3.136759 4.390569 3.5234511 3.6028554 0.38726359 + 15 2.7222703 3.5872169 3.237128 3.1830136 0.31002649 Difference at 95.0% confidence -0.419842 +/- 0.26232 -11.653% +/- 7.28088% (Student's t, pooled s = 0.350777) --- .../llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h | 4 ++ llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 4 ++ llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp | 18 ++++++ .../DebugInfo/DWARF/DWARFDebugInfoEntry.cpp | 57 ++++++++++++------- llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 11 ++++ 5 files changed, 72 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h index 6439827ef70f0..18555bafdc1f0 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h @@ -62,6 +62,7 @@ class DWARFDebugAbbrev { mutable DWARFAbbreviationDeclarationSetMap AbbrDeclSets; mutable DWARFAbbreviationDeclarationSetMap::const_iterator PrevAbbrOffsetPos; mutable std::optional Data; + mutable std::map CUAbbrevs; public: DWARFDebugAbbrev(DataExtractor Data); @@ -69,6 +70,9 @@ class DWARFDebugAbbrev { Expected getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const; + Expected + tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const; + void dump(raw_ostream &OS) const; Error parse() const; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 80c27aea89312..87f8742fd9d9f 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -419,6 +419,10 @@ class DWARFUnit { uint64_t getAbbreviationsOffset() const { return Header.getAbbrOffset(); } + /// Extracts only the abbreviation declaration with code 1, which is + /// typically the compile unit DIE (DW_TAG_compile_unit). + const DWARFAbbreviationDeclaration *tryExtractCUAbbrevFast() const; + const DWARFAbbreviationDeclarationSet *getAbbreviations() const; static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp index 85959ecc5e17f..7944fc881e6bd 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp @@ -168,3 +168,21 @@ DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const { .first; return &PrevAbbrOffsetPos->second; } + +Expected +DWARFDebugAbbrev::tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const { + if (auto AbbrevDecl = CUAbbrevs.find(CUAbbrOffset); + AbbrevDecl != CUAbbrevs.end()) + return &AbbrevDecl->second; + + DWARFAbbreviationDeclaration Decl; + uint64_t Offset = CUAbbrOffset; + Expected ES = + Decl.extract(*Data, &Offset); + if (!ES) + return ES.takeError(); + if (Decl.getCode() != 1) + return nullptr; + + return &(CUAbbrevs[CUAbbrOffset] = std::move(Decl)); +} diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp index 4f0a6d96ace9e..030faad13f46f 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -34,36 +34,49 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr, return false; } assert(DebugInfoData.isValidOffset(UEndOffset - 1)); + AbbrevDecl = nullptr; + uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr); if (0 == AbbrCode) { // NULL debug tag entry. - AbbrevDecl = nullptr; return true; } - const auto *AbbrevSet = U.getAbbreviations(); - if (!AbbrevSet) { - U.getContext().getWarningHandler()( - createStringError(errc::invalid_argument, - "DWARF unit at offset 0x%8.8" PRIx64 " " - "contains invalid abbreviation set offset 0x%" PRIx64, - U.getOffset(), U.getAbbreviationsOffset())); - // Restore the original offset. - *OffsetPtr = Offset; - return false; + + // Fast path: parsing the entire abbreviation table is wasteful if we only + // need the unit DIE (typically AbbrCode == 1). + if (1 == AbbrCode) { + AbbrevDecl = U.tryExtractCUAbbrevFast(); + assert(!AbbrevDecl || AbbrevDecl->getCode() == AbbrCode); } - AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode); + if (!AbbrevDecl) { - U.getContext().getWarningHandler()( - createStringError(errc::invalid_argument, - "DWARF unit at offset 0x%8.8" PRIx64 " " - "contains invalid abbreviation %" PRIu64 " at " - "offset 0x%8.8" PRIx64 ", valid abbreviations are %s", - U.getOffset(), AbbrCode, *OffsetPtr, - AbbrevSet->getCodeRange().c_str())); - // Restore the original offset. - *OffsetPtr = Offset; - return false; + const auto *AbbrevSet = U.getAbbreviations(); + if (!AbbrevSet) { + U.getContext().getWarningHandler()(createStringError( + errc::invalid_argument, + "DWARF unit at offset 0x%8.8" PRIx64 " " + "contains invalid abbreviation set offset 0x%" PRIx64, + U.getOffset(), U.getAbbreviationsOffset())); + // Restore the original offset. + *OffsetPtr = Offset; + return false; + } + AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode); + + if (!AbbrevDecl) { + U.getContext().getWarningHandler()(createStringError( + errc::invalid_argument, + "DWARF unit at offset 0x%8.8" PRIx64 " " + "contains invalid abbreviation %" PRIu64 " at " + "offset 0x%8.8" PRIx64 ", valid abbreviations are %s", + U.getOffset(), AbbrCode, *OffsetPtr, + AbbrevSet->getCodeRange().c_str())); + // Restore the original offset. + *OffsetPtr = Offset; + return false; + } } + // See if all attributes in this DIE have fixed byte sizes. If so, we can // just add this size to the offset to skip to the next DIE. if (std::optional FixedSize = diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index bdd04b00f557b..dcf323525b10e 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -1051,6 +1051,17 @@ DWARFUnit::getLastChildEntry(const DWARFDebugInfoEntry *Die) const { return nullptr; } +const DWARFAbbreviationDeclaration *DWARFUnit::tryExtractCUAbbrevFast() const { + Expected AbbrevOrError = + Abbrev->tryExtractCUAbbrevFast(getAbbreviationsOffset()); + if (!AbbrevOrError) { + // FIXME: We should propagate this error upwards. + consumeError(AbbrevOrError.takeError()); + return nullptr; + } + return *AbbrevOrError; +} + const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const { if (!Abbrevs) { Expected AbbrevsOrError = From ca8106a8c2f36e9251cf85e81a8fd81ad0ca2e2d Mon Sep 17 00:00:00 2001 From: Daniel Bertalan Date: Sun, 15 Sep 2024 22:02:59 +0200 Subject: [PATCH 2/3] Don't crash on empty optional deref --- llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp index 7944fc881e6bd..70fd5c1a8ce33 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp @@ -175,6 +175,10 @@ DWARFDebugAbbrev::tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const { AbbrevDecl != CUAbbrevs.end()) return &AbbrevDecl->second; + if (!Data || CUAbbrOffset >= Data->getData().size()) + return make_error( + "the abbreviation offset into the .debug_abbrev section is not valid"); + DWARFAbbreviationDeclaration Decl; uint64_t Offset = CUAbbrOffset; Expected ES = From ba0d7f4165d9091b30f5293596f147abd6bf4545 Mon Sep 17 00:00:00 2001 From: Daniel Bertalan Date: Tue, 17 Sep 2024 10:38:38 +0200 Subject: [PATCH 3/3] Propagate error from tryExtractCUAbbrevFast() ... but still drop it on the floor in the end, as we want to re-try the full DIE parsing. --- llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 2 +- llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp | 13 ++++++++++--- llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 12 +++--------- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 87f8742fd9d9f..8c019841035a1 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -421,7 +421,7 @@ class DWARFUnit { /// Extracts only the abbreviation declaration with code 1, which is /// typically the compile unit DIE (DW_TAG_compile_unit). - const DWARFAbbreviationDeclaration *tryExtractCUAbbrevFast() const; + Expected tryExtractCUAbbrevFast() const; const DWARFAbbreviationDeclarationSet *getAbbreviations() const; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp index 030faad13f46f..c5156e55393f2 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" #include #include @@ -44,9 +45,13 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr, // Fast path: parsing the entire abbreviation table is wasteful if we only // need the unit DIE (typically AbbrCode == 1). - if (1 == AbbrCode) { - AbbrevDecl = U.tryExtractCUAbbrevFast(); - assert(!AbbrevDecl || AbbrevDecl->getCode() == AbbrCode); + if (AbbrCode == 1) { + Expected DeclOrError = + U.tryExtractCUAbbrevFast(); + if (DeclOrError) + AbbrevDecl = *DeclOrError; + else + consumeError(DeclOrError.takeError()); // Retry full parsing below. } if (!AbbrevDecl) { @@ -77,6 +82,8 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr, } } + assert(AbbrevDecl && AbbrevDecl->getCode() == AbbrCode); + // See if all attributes in this DIE have fixed byte sizes. If so, we can // just add this size to the offset to skip to the next DIE. if (std::optional FixedSize = diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index dcf323525b10e..aab3f5f078304 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -1051,15 +1051,9 @@ DWARFUnit::getLastChildEntry(const DWARFDebugInfoEntry *Die) const { return nullptr; } -const DWARFAbbreviationDeclaration *DWARFUnit::tryExtractCUAbbrevFast() const { - Expected AbbrevOrError = - Abbrev->tryExtractCUAbbrevFast(getAbbreviationsOffset()); - if (!AbbrevOrError) { - // FIXME: We should propagate this error upwards. - consumeError(AbbrevOrError.takeError()); - return nullptr; - } - return *AbbrevOrError; +Expected +DWARFUnit::tryExtractCUAbbrevFast() const { + return Abbrev->tryExtractCUAbbrevFast(getAbbreviationsOffset()); } const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {