-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[SHT_LLVM_BB_ADDR_MAP] Add a new PGOAnalysisMap feature to emit dynamic instruction count #119303
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-binary-utilities Author: Lei Wang (wlei-llvm) ChangesAdd a new Patch is 28.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119303.diff 15 Files Affected:
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 87e4dbe4480910..5a80a05431d823 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -831,8 +831,11 @@ struct BBAddrMap {
bool BrProb : 1;
bool MultiBBRange : 1;
bool OmitBBEntries : 1;
+ bool DynamicInstCount : 1;
- bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; }
+ bool hasPGOAnalysis() const {
+ return FuncEntryCount || BBFreq || BrProb || DynamicInstCount;
+ }
bool hasPGOAnalysisBBData() const { return BBFreq || BrProb; }
@@ -842,7 +845,8 @@ struct BBAddrMap {
(static_cast<uint8_t>(BBFreq) << 1) |
(static_cast<uint8_t>(BrProb) << 2) |
(static_cast<uint8_t>(MultiBBRange) << 3) |
- (static_cast<uint8_t>(OmitBBEntries) << 4);
+ (static_cast<uint8_t>(OmitBBEntries) << 4) |
+ (static_cast<uint8_t>(DynamicInstCount) << 5);
}
// Decodes from minimum bit width representation and validates no
@@ -851,7 +855,7 @@ struct BBAddrMap {
Features Feat{
static_cast<bool>(Val & (1 << 0)), static_cast<bool>(Val & (1 << 1)),
static_cast<bool>(Val & (1 << 2)), static_cast<bool>(Val & (1 << 3)),
- static_cast<bool>(Val & (1 << 4))};
+ static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5))};
if (Feat.encode() != Val)
return createStringError(
std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x",
@@ -861,9 +865,10 @@ struct BBAddrMap {
bool operator==(const Features &Other) const {
return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange,
- OmitBBEntries) ==
+ OmitBBEntries, DynamicInstCount) ==
std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb,
- Other.MultiBBRange, Other.OmitBBEntries);
+ Other.MultiBBRange, Other.OmitBBEntries,
+ Other.DynamicInstCount);
}
};
@@ -1016,14 +1021,16 @@ struct PGOAnalysisMap {
};
uint64_t FuncEntryCount; // Prof count from IR function
+ uint64_t DynamicInstCount; // Dynamic instruction count
std::vector<PGOBBEntry> BBEntries; // Extended basic block entries
// Flags to indicate if each PGO related info was enabled in this function
BBAddrMap::Features FeatEnable;
bool operator==(const PGOAnalysisMap &Other) const {
- return std::tie(FuncEntryCount, BBEntries, FeatEnable) ==
- std::tie(Other.FuncEntryCount, Other.BBEntries, Other.FeatEnable);
+ return std::tie(FuncEntryCount, DynamicInstCount, BBEntries, FeatEnable) ==
+ std::tie(Other.FuncEntryCount, Other.DynamicInstCount,
+ Other.BBEntries, Other.FeatEnable);
}
};
diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
index dfdfa055d65fa6..17e9e56f8542eb 100644
--- a/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -192,6 +192,7 @@ struct PGOAnalysisMapEntry {
std::optional<std::vector<SuccessorEntry>> Successors;
};
std::optional<uint64_t> FuncEntryCount;
+ std::optional<uint64_t> DynamicInstCount;
std::optional<std::vector<PGOBBEntry>> PGOBBEntries;
};
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3072edc5088e2a..a3f46d3edd5099 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -143,6 +143,7 @@ using namespace llvm;
enum class PGOMapFeaturesEnum {
None,
FuncEntryCount,
+ DynInstCount,
BBFreq,
BrProb,
All,
@@ -153,6 +154,8 @@ static cl::bits<PGOMapFeaturesEnum> PgoAnalysisMapFeatures(
clEnumValN(PGOMapFeaturesEnum::None, "none", "Disable all options"),
clEnumValN(PGOMapFeaturesEnum::FuncEntryCount, "func-entry-count",
"Function Entry Count"),
+ clEnumValN(PGOMapFeaturesEnum::DynInstCount, "dyn-inst-count",
+ "Dynamic instruction count"),
clEnumValN(PGOMapFeaturesEnum::BBFreq, "bb-freq",
"Basic Block Frequency"),
clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob", "Branch Probability"),
@@ -1412,6 +1415,9 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) {
bool FuncEntryCountEnabled =
AllFeatures || (!NoFeatures && PgoAnalysisMapFeatures.isSet(
PGOMapFeaturesEnum::FuncEntryCount));
+ bool DynInstCountEnabled =
+ AllFeatures || (!NoFeatures && PgoAnalysisMapFeatures.isSet(
+ PGOMapFeaturesEnum::DynInstCount));
bool BBFreqEnabled =
AllFeatures ||
(!NoFeatures && PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BBFreq));
@@ -1424,9 +1430,12 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) {
"BB entries info is required for BBFreq and BrProb "
"features");
}
- return {FuncEntryCountEnabled, BBFreqEnabled, BrProbEnabled,
+ return {FuncEntryCountEnabled,
+ BBFreqEnabled,
+ BrProbEnabled,
MF.hasBBSections() && NumMBBSectionRanges > 1,
- static_cast<bool>(BBAddrMapSkipEmitBBEntries)};
+ static_cast<bool>(BBAddrMapSkipEmitBBEntries),
+ DynInstCountEnabled};
}
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
@@ -1519,7 +1528,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
MaybeEntryCount ? MaybeEntryCount->getCount() : 0);
}
const MachineBlockFrequencyInfo *MBFI =
- Features.BBFreq
+ Features.BBFreq || Features.DynamicInstCount
? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
: nullptr;
const MachineBranchProbabilityInfo *MBPI =
@@ -1527,8 +1536,13 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
? &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI()
: nullptr;
- if (Features.BBFreq || Features.BrProb) {
+ if (Features.BBFreq || Features.BrProb || Features.DynamicInstCount) {
+ uint64_t DynInstCount = 0;
for (const MachineBasicBlock &MBB : MF) {
+ if (Features.DynamicInstCount) {
+ DynInstCount +=
+ MBFI->getBlockProfileCount(&MBB).value_or(0) * MBB.size();
+ }
if (Features.BBFreq) {
OutStreamer->AddComment("basic block frequency");
OutStreamer->emitULEB128IntValue(
@@ -1547,6 +1561,10 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
}
}
}
+ if (Features.DynamicInstCount) {
+ OutStreamer->AddComment("Dynamic instruction count");
+ OutStreamer->emitULEB128IntValue(DynInstCount);
+ }
}
}
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index b6d0699ee4fe08..549d1de2a83f7c 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -887,6 +887,12 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr)
: 0;
+ // Dynamic instruction count
+ uint64_t DynInstCount =
+ FeatEnable.DynamicInstCount
+ ? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr)
+ : 0;
+
std::vector<PGOAnalysisMap::PGOBBEntry> PGOBBEntries;
for (uint32_t BlockIndex = 0;
FeatEnable.hasPGOAnalysisBBData() && !MetadataDecodeErr &&
@@ -915,8 +921,8 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
}
if (PGOAnalyses)
- PGOAnalyses->push_back(
- {FuncEntryCount, std::move(PGOBBEntries), FeatEnable});
+ PGOAnalyses->push_back({FuncEntryCount, DynInstCount,
+ std::move(PGOBBEntries), FeatEnable});
}
}
// Either Cur is in the error state, or we have an error in ULEBSizeErr or
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index 5daf6c32ec936a..5f8675c0e6b107 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1515,6 +1515,9 @@ void ELFState<ELFT>::writeSectionContent(
if (PGOEntry.FuncEntryCount)
SHeader.sh_size += CBA.writeULEB128(*PGOEntry.FuncEntryCount);
+ if (PGOEntry.DynamicInstCount)
+ SHeader.sh_size += CBA.writeULEB128(*PGOEntry.DynamicInstCount);
+
if (!PGOEntry.PGOBBEntries)
continue;
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index ca0ea03452d3be..424cbd45f0e01e 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -1872,6 +1872,7 @@ void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping(
IO &IO, ELFYAML::PGOAnalysisMapEntry &E) {
assert(IO.getContext() && "The IO context is not initialized");
IO.mapOptional("FuncEntryCount", E.FuncEntryCount);
+ IO.mapOptional("DynamicInstCount", E.DynamicInstCount);
IO.mapOptional("PGOBBEntries", E.PGOBBEntries);
}
diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
index 63779727ec72c6..0e49427ec3efe8 100644
--- a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
+++ b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
@@ -3,13 +3,14 @@
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=none | FileCheck %s --check-prefixes=CHECK,BASIC,PGO-NONE
;; Also verify this holds for all PGO features enabled
-; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP
-; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=all | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob,dyn-inst-count | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=all | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP,PGO-DINST
;; Also verify that pgo extension only includes the enabled feature
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=dyn-inst-count | FileCheck %s --check-prefixes=CHECK,PGO-DINST,DINST-ONLY
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count -basic-block-address-map-skip-bb-entries | FileCheck %s --check-prefixes=SKIP-BB-ENTRIES
; RUN: not llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq -basic-block-address-map-skip-bb-entries 2>&1 | FileCheck %s --check-prefixes=SKIP-BB-ENTRIES-ERROR
@@ -71,8 +72,9 @@ declare i32 @__gxx_personality_v0(...)
; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text._Z3bazb{{$}}
; CHECK-NEXT: .byte 2 # version
; BASIC-NEXT: .byte 0 # feature
-; PGO-ALL-NEXT: .byte 7 # feature
+; PGO-ALL-NEXT: .byte 39 # feature
; FEC-ONLY-NEXT:.byte 1 # feature
+; DINST-ONLY-NEXT:.byte 32 # feature
; BBF-ONLY-NEXT:.byte 2 # feature
; BRP-ONLY-NEXT:.byte 4 # feature
; CHECK-NEXT: .quad .Lfunc_begin0 # function address
@@ -138,6 +140,8 @@ declare i32 @__gxx_personality_v0(...)
; PGO-BRP-NEXT: .byte 5 # successor BB ID
; PGO-BRP-NEXT: .ascii "\200\200\200\200\b" # successor branch probability
+; PGO-DINST: .ascii "\341\f" # Dynamic instruction count
+
; SKIP-BB-ENTRIES: .byte 17 # feature
; SKIP-BB-ENTRIES-NEXT: .quad .Lfunc_begin0 # function address
; SKIP-BB-ENTRIES-NEXT: .byte 6 # number of basic blocks
diff --git a/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml
index 4d1e5408d86d4d..26466c8ff48ffa 100644
--- a/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml
+++ b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml
@@ -43,9 +43,49 @@ Symbols:
# ENTRYCOUNT: <foo>:
# ENTRYCOUNT: <BB3> (Entry count: 1000):
+## Check the case where we only have dynamic instruction count.
+# RUN: yaml2obj --docnum=2 %s -o %t1
+# RUN: llvm-objdump %t1 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
+# RUN: FileCheck %s --check-prefix=DYNICNT
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_X86_64
+Sections:
+ - Name: .text.foo
+ Type: SHT_PROGBITS
+ Address: 0x0
+ Flags: [SHF_ALLOC, SHF_EXECINSTR]
+ Content: '50'
+ - Name: .llvm_bb_addr_map.foo
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Link: .text.foo
+ Entries:
+ - Version: 2
+ Feature: 0x20
+ BBRanges:
+ - BaseAddress: 0x0
+ BBEntries:
+ - ID: 3
+ AddressOffset: 0x0
+ Size: 0x1
+ Metadata: 0x1
+ PGOAnalyses:
+ - DynamicInstCount: 1000
+Symbols:
+ - Name: foo
+ Section: .text.foo
+ Value: 0x0
+
+# DYNICNT: <foo>:
+# DYNICNT: <BB3> (Dynamic instruction count: 1000):
+
## Check the case where we have entry points and block frequency information
-# RUN: yaml2obj %s --docnum=2 -o %t2
+# RUN: yaml2obj %s --docnum=3 -o %t2
# RUN: llvm-objdump %t2 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck --match-full-lines --strict-whitespace %s --check-prefix=ENTRYCOUNT-BLOCKFREQ
# RUN: llvm-objdump %t2 -d --symbolize-operands --pretty-pgo-analysis-map --no-show-raw-insn --no-leading-addr | \
@@ -68,7 +108,7 @@ Sections:
Link: .text.foo
Entries:
- Version: 2
- Feature: 0x3
+ Feature: 0x23
BBRanges:
- BaseAddress: 0x0
BBEntries:
@@ -90,6 +130,7 @@ Sections:
Metadata: 0x2
PGOAnalyses:
- FuncEntryCount: 1000
+ DynamicInstCount: 2000
PGOBBEntries:
- BBFreq: 1000
- BBFreq: 133
@@ -101,13 +142,13 @@ Symbols:
Value: 0x0
# ENTRYCOUNT-BLOCKFREQ:<foo>:
-# ENTRYCOUNT-BLOCKFREQ:<BB3> (Entry count: 1000, Frequency: 1000):
+# ENTRYCOUNT-BLOCKFREQ:<BB3> (Entry count: 1000, Dynamic instruction count: 2000, Frequency: 1000):
# ENTRYCOUNT-BLOCKFREQ:<BB1> (Frequency: 133):
# ENTRYCOUNT-BLOCKFREQ:<BB2> (Frequency: 18):
# ENTRYCOUNT-BLOCKFREQ:<BB5> (Frequency: 1000):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<foo>:
-# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB3> (Entry count: 1000, Frequency: 1.0):
+# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB3> (Entry count: 1000, Dynamic instruction count: 2000, Frequency: 1.0):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB1> (Frequency: 0.133):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB2> (Frequency: 0.018):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB5> (Frequency: 1.0):
@@ -115,7 +156,7 @@ Symbols:
## Check the case where we have entry points, block frequency, and branch
## proabability information.
-# RUN: yaml2obj %s --docnum=3 -o %t3
+# RUN: yaml2obj %s --docnum=4 -o %t3
# RUN: llvm-objdump %t3 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck --match-full-lines --strict-whitespace %s --check-prefix=ENTRY-FREQ-PROB
# RUN: llvm-objdump %t3 -d --symbolize-operands --pretty-pgo-analysis-map --no-show-raw-insn --no-leading-addr | \
diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
index 5faafd4d83b2f2..748e0fb3dbeb50 100644
--- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
+++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
@@ -15,7 +15,7 @@
## Check that a malformed section can be handled.
# RUN: yaml2obj %s -DBITS=32 -DSIZE=24 -o %t2.o
-# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000018 -DFILE=%t2.o --check-prefix=TRUNCATED
+# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000014 -DFILE=%t2.o --check-prefix=TRUNCATED
## Check that missing features can be handled.
# RUN: yaml2obj %s -DBITS=32 -DFEATURE=0x2 -o %t3.o
@@ -55,6 +55,7 @@
# CHECK-NEXT: ]
# CHECK-NEXT: PGO analyses {
# CHECK-NEXT: FuncEntryCount: 100
+# CHECK-NEXT: DynamicInstCount: 100
# CHECK-NEXT: PGO BB entries [
# CHECK-NEXT: {
# RAW-NEXT: Frequency: 100
@@ -98,6 +99,7 @@
# CHECK-NEXT: ]
# CHECK-NEXT: PGO analyses {
# CHECK-NEXT: FuncEntryCount: 8888
+# CHECK-NEXT: DynamicInstCount: 8888
# CHECK-NEXT: PGO BB entries [
# CHECK-NEXT: {
# RAW-NEXT: Frequency: 9000
@@ -173,7 +175,7 @@ Sections:
Link: .text
Entries:
- Version: 2
- Feature: 0x7
+ Feature: 0x27
BBRanges:
- BaseAddress: [[ADDR=0x11111]]
BBEntries:
@@ -186,7 +188,7 @@ Sections:
Size: 0x4
Metadata: 0x15
- Version: 2
- Feature: 0x3
+ Feature: 0x23
BBRanges:
- BaseAddress: 0x22222
BBEntries:
@@ -196,6 +198,7 @@ Sections:
Metadata: 0x8
PGOAnalyses:
- FuncEntryCount: 100
+ DynamicInstCount: 100
PGOBBEntries:
- BBFreq: 100
Successors:
@@ -204,6 +207,7 @@ Sections:
- BBFreq: 100
Successors: []
- FuncEntryCount: 8888
+ DynamicInstCount: 8888
PGOBBEntries:
- BBFreq: 9000
- Name: dummy_section
@@ -237,4 +241,3 @@ Symbols:
Section: .text.bar
Type: STT_FUNC
Value: 0x33333
-
diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
index 299bf463cf4bc9..37edd968893b3a 100644
--- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
+++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
@@ -15,7 +15,7 @@
# VALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP
# VALID-NEXT: Entries:
# VALID-NEXT: - Version: 2
-# VALID-NEXT: Feature: 0x7
+# VALID-NEXT: Feature: 0x27
## The 'BaseAddress' field is omitted when it's zero.
# VALID-NEXT: BBRanges:
# VALID-NEXT: - BBEntries:
@@ -42,6 +42,7 @@
# VALID-NEXT: Metadata: 0xC
# VALID-NEXT: PGOAnalyses:
# VALID-NEXT: - FuncEntryCount: 100
+# VALID-NEXT: DynamicInstCount: 200
# VALID-NEXT: PGOBBEntries:
# VALID-NEXT: - BBFreq: 100
# VALID-NEXT: Successors:
@@ -69,7 +70,7 @@ Sections:
ShSize: [[SIZE=<none>]]
Entries:
- Version: 2
- Feature: 0x7
+ Feature: 0x27
BBRanges:
- BaseAddress: 0x0
BBEntries:
@@ -96,6 +97,7 @@ Sections:
Metadata: 0xC
PGOAnalyses:
- FuncEntryCount: 100
+ DynamicInstCount: 200
PGOBBEntries:
- BBFreq: 100
Successors:
@@ -229,4 +231,3 @@ Sections:
# MISSING-FEC-NEXT: - Name: .llvm_bb_addr_map
# MISSING-FEC-NEXT: Type: SHT_LLV...
[truncated]
|
@llvm/pr-subscribers-backend-x86 Author: Lei Wang (wlei-llvm) ChangesAdd a new Patch is 28.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119303.diff 15 Files Affected:
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 87e4dbe4480910..5a80a05431d823 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -831,8 +831,11 @@ struct BBAddrMap {
bool BrProb : 1;
bool MultiBBRange : 1;
bool OmitBBEntries : 1;
+ bool DynamicInstCount : 1;
- bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; }
+ bool hasPGOAnalysis() const {
+ return FuncEntryCount || BBFreq || BrProb || DynamicInstCount;
+ }
bool hasPGOAnalysisBBData() const { return BBFreq || BrProb; }
@@ -842,7 +845,8 @@ struct BBAddrMap {
(static_cast<uint8_t>(BBFreq) << 1) |
(static_cast<uint8_t>(BrProb) << 2) |
(static_cast<uint8_t>(MultiBBRange) << 3) |
- (static_cast<uint8_t>(OmitBBEntries) << 4);
+ (static_cast<uint8_t>(OmitBBEntries) << 4) |
+ (static_cast<uint8_t>(DynamicInstCount) << 5);
}
// Decodes from minimum bit width representation and validates no
@@ -851,7 +855,7 @@ struct BBAddrMap {
Features Feat{
static_cast<bool>(Val & (1 << 0)), static_cast<bool>(Val & (1 << 1)),
static_cast<bool>(Val & (1 << 2)), static_cast<bool>(Val & (1 << 3)),
- static_cast<bool>(Val & (1 << 4))};
+ static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5))};
if (Feat.encode() != Val)
return createStringError(
std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x",
@@ -861,9 +865,10 @@ struct BBAddrMap {
bool operator==(const Features &Other) const {
return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange,
- OmitBBEntries) ==
+ OmitBBEntries, DynamicInstCount) ==
std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb,
- Other.MultiBBRange, Other.OmitBBEntries);
+ Other.MultiBBRange, Other.OmitBBEntries,
+ Other.DynamicInstCount);
}
};
@@ -1016,14 +1021,16 @@ struct PGOAnalysisMap {
};
uint64_t FuncEntryCount; // Prof count from IR function
+ uint64_t DynamicInstCount; // Dynamic instruction count
std::vector<PGOBBEntry> BBEntries; // Extended basic block entries
// Flags to indicate if each PGO related info was enabled in this function
BBAddrMap::Features FeatEnable;
bool operator==(const PGOAnalysisMap &Other) const {
- return std::tie(FuncEntryCount, BBEntries, FeatEnable) ==
- std::tie(Other.FuncEntryCount, Other.BBEntries, Other.FeatEnable);
+ return std::tie(FuncEntryCount, DynamicInstCount, BBEntries, FeatEnable) ==
+ std::tie(Other.FuncEntryCount, Other.DynamicInstCount,
+ Other.BBEntries, Other.FeatEnable);
}
};
diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
index dfdfa055d65fa6..17e9e56f8542eb 100644
--- a/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -192,6 +192,7 @@ struct PGOAnalysisMapEntry {
std::optional<std::vector<SuccessorEntry>> Successors;
};
std::optional<uint64_t> FuncEntryCount;
+ std::optional<uint64_t> DynamicInstCount;
std::optional<std::vector<PGOBBEntry>> PGOBBEntries;
};
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3072edc5088e2a..a3f46d3edd5099 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -143,6 +143,7 @@ using namespace llvm;
enum class PGOMapFeaturesEnum {
None,
FuncEntryCount,
+ DynInstCount,
BBFreq,
BrProb,
All,
@@ -153,6 +154,8 @@ static cl::bits<PGOMapFeaturesEnum> PgoAnalysisMapFeatures(
clEnumValN(PGOMapFeaturesEnum::None, "none", "Disable all options"),
clEnumValN(PGOMapFeaturesEnum::FuncEntryCount, "func-entry-count",
"Function Entry Count"),
+ clEnumValN(PGOMapFeaturesEnum::DynInstCount, "dyn-inst-count",
+ "Dynamic instruction count"),
clEnumValN(PGOMapFeaturesEnum::BBFreq, "bb-freq",
"Basic Block Frequency"),
clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob", "Branch Probability"),
@@ -1412,6 +1415,9 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) {
bool FuncEntryCountEnabled =
AllFeatures || (!NoFeatures && PgoAnalysisMapFeatures.isSet(
PGOMapFeaturesEnum::FuncEntryCount));
+ bool DynInstCountEnabled =
+ AllFeatures || (!NoFeatures && PgoAnalysisMapFeatures.isSet(
+ PGOMapFeaturesEnum::DynInstCount));
bool BBFreqEnabled =
AllFeatures ||
(!NoFeatures && PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BBFreq));
@@ -1424,9 +1430,12 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) {
"BB entries info is required for BBFreq and BrProb "
"features");
}
- return {FuncEntryCountEnabled, BBFreqEnabled, BrProbEnabled,
+ return {FuncEntryCountEnabled,
+ BBFreqEnabled,
+ BrProbEnabled,
MF.hasBBSections() && NumMBBSectionRanges > 1,
- static_cast<bool>(BBAddrMapSkipEmitBBEntries)};
+ static_cast<bool>(BBAddrMapSkipEmitBBEntries),
+ DynInstCountEnabled};
}
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
@@ -1519,7 +1528,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
MaybeEntryCount ? MaybeEntryCount->getCount() : 0);
}
const MachineBlockFrequencyInfo *MBFI =
- Features.BBFreq
+ Features.BBFreq || Features.DynamicInstCount
? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
: nullptr;
const MachineBranchProbabilityInfo *MBPI =
@@ -1527,8 +1536,13 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
? &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI()
: nullptr;
- if (Features.BBFreq || Features.BrProb) {
+ if (Features.BBFreq || Features.BrProb || Features.DynamicInstCount) {
+ uint64_t DynInstCount = 0;
for (const MachineBasicBlock &MBB : MF) {
+ if (Features.DynamicInstCount) {
+ DynInstCount +=
+ MBFI->getBlockProfileCount(&MBB).value_or(0) * MBB.size();
+ }
if (Features.BBFreq) {
OutStreamer->AddComment("basic block frequency");
OutStreamer->emitULEB128IntValue(
@@ -1547,6 +1561,10 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
}
}
}
+ if (Features.DynamicInstCount) {
+ OutStreamer->AddComment("Dynamic instruction count");
+ OutStreamer->emitULEB128IntValue(DynInstCount);
+ }
}
}
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index b6d0699ee4fe08..549d1de2a83f7c 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -887,6 +887,12 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr)
: 0;
+ // Dynamic instruction count
+ uint64_t DynInstCount =
+ FeatEnable.DynamicInstCount
+ ? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr)
+ : 0;
+
std::vector<PGOAnalysisMap::PGOBBEntry> PGOBBEntries;
for (uint32_t BlockIndex = 0;
FeatEnable.hasPGOAnalysisBBData() && !MetadataDecodeErr &&
@@ -915,8 +921,8 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
}
if (PGOAnalyses)
- PGOAnalyses->push_back(
- {FuncEntryCount, std::move(PGOBBEntries), FeatEnable});
+ PGOAnalyses->push_back({FuncEntryCount, DynInstCount,
+ std::move(PGOBBEntries), FeatEnable});
}
}
// Either Cur is in the error state, or we have an error in ULEBSizeErr or
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index 5daf6c32ec936a..5f8675c0e6b107 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1515,6 +1515,9 @@ void ELFState<ELFT>::writeSectionContent(
if (PGOEntry.FuncEntryCount)
SHeader.sh_size += CBA.writeULEB128(*PGOEntry.FuncEntryCount);
+ if (PGOEntry.DynamicInstCount)
+ SHeader.sh_size += CBA.writeULEB128(*PGOEntry.DynamicInstCount);
+
if (!PGOEntry.PGOBBEntries)
continue;
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index ca0ea03452d3be..424cbd45f0e01e 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -1872,6 +1872,7 @@ void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping(
IO &IO, ELFYAML::PGOAnalysisMapEntry &E) {
assert(IO.getContext() && "The IO context is not initialized");
IO.mapOptional("FuncEntryCount", E.FuncEntryCount);
+ IO.mapOptional("DynamicInstCount", E.DynamicInstCount);
IO.mapOptional("PGOBBEntries", E.PGOBBEntries);
}
diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
index 63779727ec72c6..0e49427ec3efe8 100644
--- a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
+++ b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
@@ -3,13 +3,14 @@
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=none | FileCheck %s --check-prefixes=CHECK,BASIC,PGO-NONE
;; Also verify this holds for all PGO features enabled
-; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP
-; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=all | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob,dyn-inst-count | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=all | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP,PGO-DINST
;; Also verify that pgo extension only includes the enabled feature
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=dyn-inst-count | FileCheck %s --check-prefixes=CHECK,PGO-DINST,DINST-ONLY
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count -basic-block-address-map-skip-bb-entries | FileCheck %s --check-prefixes=SKIP-BB-ENTRIES
; RUN: not llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq -basic-block-address-map-skip-bb-entries 2>&1 | FileCheck %s --check-prefixes=SKIP-BB-ENTRIES-ERROR
@@ -71,8 +72,9 @@ declare i32 @__gxx_personality_v0(...)
; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text._Z3bazb{{$}}
; CHECK-NEXT: .byte 2 # version
; BASIC-NEXT: .byte 0 # feature
-; PGO-ALL-NEXT: .byte 7 # feature
+; PGO-ALL-NEXT: .byte 39 # feature
; FEC-ONLY-NEXT:.byte 1 # feature
+; DINST-ONLY-NEXT:.byte 32 # feature
; BBF-ONLY-NEXT:.byte 2 # feature
; BRP-ONLY-NEXT:.byte 4 # feature
; CHECK-NEXT: .quad .Lfunc_begin0 # function address
@@ -138,6 +140,8 @@ declare i32 @__gxx_personality_v0(...)
; PGO-BRP-NEXT: .byte 5 # successor BB ID
; PGO-BRP-NEXT: .ascii "\200\200\200\200\b" # successor branch probability
+; PGO-DINST: .ascii "\341\f" # Dynamic instruction count
+
; SKIP-BB-ENTRIES: .byte 17 # feature
; SKIP-BB-ENTRIES-NEXT: .quad .Lfunc_begin0 # function address
; SKIP-BB-ENTRIES-NEXT: .byte 6 # number of basic blocks
diff --git a/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml
index 4d1e5408d86d4d..26466c8ff48ffa 100644
--- a/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml
+++ b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml
@@ -43,9 +43,49 @@ Symbols:
# ENTRYCOUNT: <foo>:
# ENTRYCOUNT: <BB3> (Entry count: 1000):
+## Check the case where we only have dynamic instruction count.
+# RUN: yaml2obj --docnum=2 %s -o %t1
+# RUN: llvm-objdump %t1 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
+# RUN: FileCheck %s --check-prefix=DYNICNT
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_X86_64
+Sections:
+ - Name: .text.foo
+ Type: SHT_PROGBITS
+ Address: 0x0
+ Flags: [SHF_ALLOC, SHF_EXECINSTR]
+ Content: '50'
+ - Name: .llvm_bb_addr_map.foo
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Link: .text.foo
+ Entries:
+ - Version: 2
+ Feature: 0x20
+ BBRanges:
+ - BaseAddress: 0x0
+ BBEntries:
+ - ID: 3
+ AddressOffset: 0x0
+ Size: 0x1
+ Metadata: 0x1
+ PGOAnalyses:
+ - DynamicInstCount: 1000
+Symbols:
+ - Name: foo
+ Section: .text.foo
+ Value: 0x0
+
+# DYNICNT: <foo>:
+# DYNICNT: <BB3> (Dynamic instruction count: 1000):
+
## Check the case where we have entry points and block frequency information
-# RUN: yaml2obj %s --docnum=2 -o %t2
+# RUN: yaml2obj %s --docnum=3 -o %t2
# RUN: llvm-objdump %t2 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck --match-full-lines --strict-whitespace %s --check-prefix=ENTRYCOUNT-BLOCKFREQ
# RUN: llvm-objdump %t2 -d --symbolize-operands --pretty-pgo-analysis-map --no-show-raw-insn --no-leading-addr | \
@@ -68,7 +108,7 @@ Sections:
Link: .text.foo
Entries:
- Version: 2
- Feature: 0x3
+ Feature: 0x23
BBRanges:
- BaseAddress: 0x0
BBEntries:
@@ -90,6 +130,7 @@ Sections:
Metadata: 0x2
PGOAnalyses:
- FuncEntryCount: 1000
+ DynamicInstCount: 2000
PGOBBEntries:
- BBFreq: 1000
- BBFreq: 133
@@ -101,13 +142,13 @@ Symbols:
Value: 0x0
# ENTRYCOUNT-BLOCKFREQ:<foo>:
-# ENTRYCOUNT-BLOCKFREQ:<BB3> (Entry count: 1000, Frequency: 1000):
+# ENTRYCOUNT-BLOCKFREQ:<BB3> (Entry count: 1000, Dynamic instruction count: 2000, Frequency: 1000):
# ENTRYCOUNT-BLOCKFREQ:<BB1> (Frequency: 133):
# ENTRYCOUNT-BLOCKFREQ:<BB2> (Frequency: 18):
# ENTRYCOUNT-BLOCKFREQ:<BB5> (Frequency: 1000):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<foo>:
-# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB3> (Entry count: 1000, Frequency: 1.0):
+# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB3> (Entry count: 1000, Dynamic instruction count: 2000, Frequency: 1.0):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB1> (Frequency: 0.133):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB2> (Frequency: 0.018):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB5> (Frequency: 1.0):
@@ -115,7 +156,7 @@ Symbols:
## Check the case where we have entry points, block frequency, and branch
## proabability information.
-# RUN: yaml2obj %s --docnum=3 -o %t3
+# RUN: yaml2obj %s --docnum=4 -o %t3
# RUN: llvm-objdump %t3 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck --match-full-lines --strict-whitespace %s --check-prefix=ENTRY-FREQ-PROB
# RUN: llvm-objdump %t3 -d --symbolize-operands --pretty-pgo-analysis-map --no-show-raw-insn --no-leading-addr | \
diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
index 5faafd4d83b2f2..748e0fb3dbeb50 100644
--- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
+++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
@@ -15,7 +15,7 @@
## Check that a malformed section can be handled.
# RUN: yaml2obj %s -DBITS=32 -DSIZE=24 -o %t2.o
-# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000018 -DFILE=%t2.o --check-prefix=TRUNCATED
+# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000014 -DFILE=%t2.o --check-prefix=TRUNCATED
## Check that missing features can be handled.
# RUN: yaml2obj %s -DBITS=32 -DFEATURE=0x2 -o %t3.o
@@ -55,6 +55,7 @@
# CHECK-NEXT: ]
# CHECK-NEXT: PGO analyses {
# CHECK-NEXT: FuncEntryCount: 100
+# CHECK-NEXT: DynamicInstCount: 100
# CHECK-NEXT: PGO BB entries [
# CHECK-NEXT: {
# RAW-NEXT: Frequency: 100
@@ -98,6 +99,7 @@
# CHECK-NEXT: ]
# CHECK-NEXT: PGO analyses {
# CHECK-NEXT: FuncEntryCount: 8888
+# CHECK-NEXT: DynamicInstCount: 8888
# CHECK-NEXT: PGO BB entries [
# CHECK-NEXT: {
# RAW-NEXT: Frequency: 9000
@@ -173,7 +175,7 @@ Sections:
Link: .text
Entries:
- Version: 2
- Feature: 0x7
+ Feature: 0x27
BBRanges:
- BaseAddress: [[ADDR=0x11111]]
BBEntries:
@@ -186,7 +188,7 @@ Sections:
Size: 0x4
Metadata: 0x15
- Version: 2
- Feature: 0x3
+ Feature: 0x23
BBRanges:
- BaseAddress: 0x22222
BBEntries:
@@ -196,6 +198,7 @@ Sections:
Metadata: 0x8
PGOAnalyses:
- FuncEntryCount: 100
+ DynamicInstCount: 100
PGOBBEntries:
- BBFreq: 100
Successors:
@@ -204,6 +207,7 @@ Sections:
- BBFreq: 100
Successors: []
- FuncEntryCount: 8888
+ DynamicInstCount: 8888
PGOBBEntries:
- BBFreq: 9000
- Name: dummy_section
@@ -237,4 +241,3 @@ Symbols:
Section: .text.bar
Type: STT_FUNC
Value: 0x33333
-
diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
index 299bf463cf4bc9..37edd968893b3a 100644
--- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
+++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
@@ -15,7 +15,7 @@
# VALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP
# VALID-NEXT: Entries:
# VALID-NEXT: - Version: 2
-# VALID-NEXT: Feature: 0x7
+# VALID-NEXT: Feature: 0x27
## The 'BaseAddress' field is omitted when it's zero.
# VALID-NEXT: BBRanges:
# VALID-NEXT: - BBEntries:
@@ -42,6 +42,7 @@
# VALID-NEXT: Metadata: 0xC
# VALID-NEXT: PGOAnalyses:
# VALID-NEXT: - FuncEntryCount: 100
+# VALID-NEXT: DynamicInstCount: 200
# VALID-NEXT: PGOBBEntries:
# VALID-NEXT: - BBFreq: 100
# VALID-NEXT: Successors:
@@ -69,7 +70,7 @@ Sections:
ShSize: [[SIZE=<none>]]
Entries:
- Version: 2
- Feature: 0x7
+ Feature: 0x27
BBRanges:
- BaseAddress: 0x0
BBEntries:
@@ -96,6 +97,7 @@ Sections:
Metadata: 0xC
PGOAnalyses:
- FuncEntryCount: 100
+ DynamicInstCount: 200
PGOBBEntries:
- BBFreq: 100
Successors:
@@ -229,4 +231,3 @@ Sections:
# MISSING-FEC-NEXT: - Name: .llvm_bb_addr_map
# MISSING-FEC-NEXT: Type: SHT_LLV...
[truncated]
|
@llvm/pr-subscribers-objectyaml Author: Lei Wang (wlei-llvm) ChangesAdd a new Patch is 28.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119303.diff 15 Files Affected:
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 87e4dbe4480910..5a80a05431d823 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -831,8 +831,11 @@ struct BBAddrMap {
bool BrProb : 1;
bool MultiBBRange : 1;
bool OmitBBEntries : 1;
+ bool DynamicInstCount : 1;
- bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; }
+ bool hasPGOAnalysis() const {
+ return FuncEntryCount || BBFreq || BrProb || DynamicInstCount;
+ }
bool hasPGOAnalysisBBData() const { return BBFreq || BrProb; }
@@ -842,7 +845,8 @@ struct BBAddrMap {
(static_cast<uint8_t>(BBFreq) << 1) |
(static_cast<uint8_t>(BrProb) << 2) |
(static_cast<uint8_t>(MultiBBRange) << 3) |
- (static_cast<uint8_t>(OmitBBEntries) << 4);
+ (static_cast<uint8_t>(OmitBBEntries) << 4) |
+ (static_cast<uint8_t>(DynamicInstCount) << 5);
}
// Decodes from minimum bit width representation and validates no
@@ -851,7 +855,7 @@ struct BBAddrMap {
Features Feat{
static_cast<bool>(Val & (1 << 0)), static_cast<bool>(Val & (1 << 1)),
static_cast<bool>(Val & (1 << 2)), static_cast<bool>(Val & (1 << 3)),
- static_cast<bool>(Val & (1 << 4))};
+ static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5))};
if (Feat.encode() != Val)
return createStringError(
std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x",
@@ -861,9 +865,10 @@ struct BBAddrMap {
bool operator==(const Features &Other) const {
return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange,
- OmitBBEntries) ==
+ OmitBBEntries, DynamicInstCount) ==
std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb,
- Other.MultiBBRange, Other.OmitBBEntries);
+ Other.MultiBBRange, Other.OmitBBEntries,
+ Other.DynamicInstCount);
}
};
@@ -1016,14 +1021,16 @@ struct PGOAnalysisMap {
};
uint64_t FuncEntryCount; // Prof count from IR function
+ uint64_t DynamicInstCount; // Dynamic instruction count
std::vector<PGOBBEntry> BBEntries; // Extended basic block entries
// Flags to indicate if each PGO related info was enabled in this function
BBAddrMap::Features FeatEnable;
bool operator==(const PGOAnalysisMap &Other) const {
- return std::tie(FuncEntryCount, BBEntries, FeatEnable) ==
- std::tie(Other.FuncEntryCount, Other.BBEntries, Other.FeatEnable);
+ return std::tie(FuncEntryCount, DynamicInstCount, BBEntries, FeatEnable) ==
+ std::tie(Other.FuncEntryCount, Other.DynamicInstCount,
+ Other.BBEntries, Other.FeatEnable);
}
};
diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
index dfdfa055d65fa6..17e9e56f8542eb 100644
--- a/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -192,6 +192,7 @@ struct PGOAnalysisMapEntry {
std::optional<std::vector<SuccessorEntry>> Successors;
};
std::optional<uint64_t> FuncEntryCount;
+ std::optional<uint64_t> DynamicInstCount;
std::optional<std::vector<PGOBBEntry>> PGOBBEntries;
};
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3072edc5088e2a..a3f46d3edd5099 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -143,6 +143,7 @@ using namespace llvm;
enum class PGOMapFeaturesEnum {
None,
FuncEntryCount,
+ DynInstCount,
BBFreq,
BrProb,
All,
@@ -153,6 +154,8 @@ static cl::bits<PGOMapFeaturesEnum> PgoAnalysisMapFeatures(
clEnumValN(PGOMapFeaturesEnum::None, "none", "Disable all options"),
clEnumValN(PGOMapFeaturesEnum::FuncEntryCount, "func-entry-count",
"Function Entry Count"),
+ clEnumValN(PGOMapFeaturesEnum::DynInstCount, "dyn-inst-count",
+ "Dynamic instruction count"),
clEnumValN(PGOMapFeaturesEnum::BBFreq, "bb-freq",
"Basic Block Frequency"),
clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob", "Branch Probability"),
@@ -1412,6 +1415,9 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) {
bool FuncEntryCountEnabled =
AllFeatures || (!NoFeatures && PgoAnalysisMapFeatures.isSet(
PGOMapFeaturesEnum::FuncEntryCount));
+ bool DynInstCountEnabled =
+ AllFeatures || (!NoFeatures && PgoAnalysisMapFeatures.isSet(
+ PGOMapFeaturesEnum::DynInstCount));
bool BBFreqEnabled =
AllFeatures ||
(!NoFeatures && PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BBFreq));
@@ -1424,9 +1430,12 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) {
"BB entries info is required for BBFreq and BrProb "
"features");
}
- return {FuncEntryCountEnabled, BBFreqEnabled, BrProbEnabled,
+ return {FuncEntryCountEnabled,
+ BBFreqEnabled,
+ BrProbEnabled,
MF.hasBBSections() && NumMBBSectionRanges > 1,
- static_cast<bool>(BBAddrMapSkipEmitBBEntries)};
+ static_cast<bool>(BBAddrMapSkipEmitBBEntries),
+ DynInstCountEnabled};
}
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
@@ -1519,7 +1528,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
MaybeEntryCount ? MaybeEntryCount->getCount() : 0);
}
const MachineBlockFrequencyInfo *MBFI =
- Features.BBFreq
+ Features.BBFreq || Features.DynamicInstCount
? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
: nullptr;
const MachineBranchProbabilityInfo *MBPI =
@@ -1527,8 +1536,13 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
? &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI()
: nullptr;
- if (Features.BBFreq || Features.BrProb) {
+ if (Features.BBFreq || Features.BrProb || Features.DynamicInstCount) {
+ uint64_t DynInstCount = 0;
for (const MachineBasicBlock &MBB : MF) {
+ if (Features.DynamicInstCount) {
+ DynInstCount +=
+ MBFI->getBlockProfileCount(&MBB).value_or(0) * MBB.size();
+ }
if (Features.BBFreq) {
OutStreamer->AddComment("basic block frequency");
OutStreamer->emitULEB128IntValue(
@@ -1547,6 +1561,10 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
}
}
}
+ if (Features.DynamicInstCount) {
+ OutStreamer->AddComment("Dynamic instruction count");
+ OutStreamer->emitULEB128IntValue(DynInstCount);
+ }
}
}
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index b6d0699ee4fe08..549d1de2a83f7c 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -887,6 +887,12 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr)
: 0;
+ // Dynamic instruction count
+ uint64_t DynInstCount =
+ FeatEnable.DynamicInstCount
+ ? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr)
+ : 0;
+
std::vector<PGOAnalysisMap::PGOBBEntry> PGOBBEntries;
for (uint32_t BlockIndex = 0;
FeatEnable.hasPGOAnalysisBBData() && !MetadataDecodeErr &&
@@ -915,8 +921,8 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
}
if (PGOAnalyses)
- PGOAnalyses->push_back(
- {FuncEntryCount, std::move(PGOBBEntries), FeatEnable});
+ PGOAnalyses->push_back({FuncEntryCount, DynInstCount,
+ std::move(PGOBBEntries), FeatEnable});
}
}
// Either Cur is in the error state, or we have an error in ULEBSizeErr or
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index 5daf6c32ec936a..5f8675c0e6b107 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1515,6 +1515,9 @@ void ELFState<ELFT>::writeSectionContent(
if (PGOEntry.FuncEntryCount)
SHeader.sh_size += CBA.writeULEB128(*PGOEntry.FuncEntryCount);
+ if (PGOEntry.DynamicInstCount)
+ SHeader.sh_size += CBA.writeULEB128(*PGOEntry.DynamicInstCount);
+
if (!PGOEntry.PGOBBEntries)
continue;
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index ca0ea03452d3be..424cbd45f0e01e 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -1872,6 +1872,7 @@ void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping(
IO &IO, ELFYAML::PGOAnalysisMapEntry &E) {
assert(IO.getContext() && "The IO context is not initialized");
IO.mapOptional("FuncEntryCount", E.FuncEntryCount);
+ IO.mapOptional("DynamicInstCount", E.DynamicInstCount);
IO.mapOptional("PGOBBEntries", E.PGOBBEntries);
}
diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
index 63779727ec72c6..0e49427ec3efe8 100644
--- a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
+++ b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
@@ -3,13 +3,14 @@
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=none | FileCheck %s --check-prefixes=CHECK,BASIC,PGO-NONE
;; Also verify this holds for all PGO features enabled
-; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP
-; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=all | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob,dyn-inst-count | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=all | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP,PGO-DINST
;; Also verify that pgo extension only includes the enabled feature
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=dyn-inst-count | FileCheck %s --check-prefixes=CHECK,PGO-DINST,DINST-ONLY
; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count -basic-block-address-map-skip-bb-entries | FileCheck %s --check-prefixes=SKIP-BB-ENTRIES
; RUN: not llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq -basic-block-address-map-skip-bb-entries 2>&1 | FileCheck %s --check-prefixes=SKIP-BB-ENTRIES-ERROR
@@ -71,8 +72,9 @@ declare i32 @__gxx_personality_v0(...)
; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text._Z3bazb{{$}}
; CHECK-NEXT: .byte 2 # version
; BASIC-NEXT: .byte 0 # feature
-; PGO-ALL-NEXT: .byte 7 # feature
+; PGO-ALL-NEXT: .byte 39 # feature
; FEC-ONLY-NEXT:.byte 1 # feature
+; DINST-ONLY-NEXT:.byte 32 # feature
; BBF-ONLY-NEXT:.byte 2 # feature
; BRP-ONLY-NEXT:.byte 4 # feature
; CHECK-NEXT: .quad .Lfunc_begin0 # function address
@@ -138,6 +140,8 @@ declare i32 @__gxx_personality_v0(...)
; PGO-BRP-NEXT: .byte 5 # successor BB ID
; PGO-BRP-NEXT: .ascii "\200\200\200\200\b" # successor branch probability
+; PGO-DINST: .ascii "\341\f" # Dynamic instruction count
+
; SKIP-BB-ENTRIES: .byte 17 # feature
; SKIP-BB-ENTRIES-NEXT: .quad .Lfunc_begin0 # function address
; SKIP-BB-ENTRIES-NEXT: .byte 6 # number of basic blocks
diff --git a/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml
index 4d1e5408d86d4d..26466c8ff48ffa 100644
--- a/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml
+++ b/llvm/test/tools/llvm-objdump/X86/elf-pgoanalysismap.yaml
@@ -43,9 +43,49 @@ Symbols:
# ENTRYCOUNT: <foo>:
# ENTRYCOUNT: <BB3> (Entry count: 1000):
+## Check the case where we only have dynamic instruction count.
+# RUN: yaml2obj --docnum=2 %s -o %t1
+# RUN: llvm-objdump %t1 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
+# RUN: FileCheck %s --check-prefix=DYNICNT
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_X86_64
+Sections:
+ - Name: .text.foo
+ Type: SHT_PROGBITS
+ Address: 0x0
+ Flags: [SHF_ALLOC, SHF_EXECINSTR]
+ Content: '50'
+ - Name: .llvm_bb_addr_map.foo
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Link: .text.foo
+ Entries:
+ - Version: 2
+ Feature: 0x20
+ BBRanges:
+ - BaseAddress: 0x0
+ BBEntries:
+ - ID: 3
+ AddressOffset: 0x0
+ Size: 0x1
+ Metadata: 0x1
+ PGOAnalyses:
+ - DynamicInstCount: 1000
+Symbols:
+ - Name: foo
+ Section: .text.foo
+ Value: 0x0
+
+# DYNICNT: <foo>:
+# DYNICNT: <BB3> (Dynamic instruction count: 1000):
+
## Check the case where we have entry points and block frequency information
-# RUN: yaml2obj %s --docnum=2 -o %t2
+# RUN: yaml2obj %s --docnum=3 -o %t2
# RUN: llvm-objdump %t2 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck --match-full-lines --strict-whitespace %s --check-prefix=ENTRYCOUNT-BLOCKFREQ
# RUN: llvm-objdump %t2 -d --symbolize-operands --pretty-pgo-analysis-map --no-show-raw-insn --no-leading-addr | \
@@ -68,7 +108,7 @@ Sections:
Link: .text.foo
Entries:
- Version: 2
- Feature: 0x3
+ Feature: 0x23
BBRanges:
- BaseAddress: 0x0
BBEntries:
@@ -90,6 +130,7 @@ Sections:
Metadata: 0x2
PGOAnalyses:
- FuncEntryCount: 1000
+ DynamicInstCount: 2000
PGOBBEntries:
- BBFreq: 1000
- BBFreq: 133
@@ -101,13 +142,13 @@ Symbols:
Value: 0x0
# ENTRYCOUNT-BLOCKFREQ:<foo>:
-# ENTRYCOUNT-BLOCKFREQ:<BB3> (Entry count: 1000, Frequency: 1000):
+# ENTRYCOUNT-BLOCKFREQ:<BB3> (Entry count: 1000, Dynamic instruction count: 2000, Frequency: 1000):
# ENTRYCOUNT-BLOCKFREQ:<BB1> (Frequency: 133):
# ENTRYCOUNT-BLOCKFREQ:<BB2> (Frequency: 18):
# ENTRYCOUNT-BLOCKFREQ:<BB5> (Frequency: 1000):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<foo>:
-# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB3> (Entry count: 1000, Frequency: 1.0):
+# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB3> (Entry count: 1000, Dynamic instruction count: 2000, Frequency: 1.0):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB1> (Frequency: 0.133):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB2> (Frequency: 0.018):
# ENTRYCOUNT-BLOCKFREQ-PRETTY:<BB5> (Frequency: 1.0):
@@ -115,7 +156,7 @@ Symbols:
## Check the case where we have entry points, block frequency, and branch
## proabability information.
-# RUN: yaml2obj %s --docnum=3 -o %t3
+# RUN: yaml2obj %s --docnum=4 -o %t3
# RUN: llvm-objdump %t3 -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
# RUN: FileCheck --match-full-lines --strict-whitespace %s --check-prefix=ENTRY-FREQ-PROB
# RUN: llvm-objdump %t3 -d --symbolize-operands --pretty-pgo-analysis-map --no-show-raw-insn --no-leading-addr | \
diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
index 5faafd4d83b2f2..748e0fb3dbeb50 100644
--- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
+++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test
@@ -15,7 +15,7 @@
## Check that a malformed section can be handled.
# RUN: yaml2obj %s -DBITS=32 -DSIZE=24 -o %t2.o
-# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000018 -DFILE=%t2.o --check-prefix=TRUNCATED
+# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000014 -DFILE=%t2.o --check-prefix=TRUNCATED
## Check that missing features can be handled.
# RUN: yaml2obj %s -DBITS=32 -DFEATURE=0x2 -o %t3.o
@@ -55,6 +55,7 @@
# CHECK-NEXT: ]
# CHECK-NEXT: PGO analyses {
# CHECK-NEXT: FuncEntryCount: 100
+# CHECK-NEXT: DynamicInstCount: 100
# CHECK-NEXT: PGO BB entries [
# CHECK-NEXT: {
# RAW-NEXT: Frequency: 100
@@ -98,6 +99,7 @@
# CHECK-NEXT: ]
# CHECK-NEXT: PGO analyses {
# CHECK-NEXT: FuncEntryCount: 8888
+# CHECK-NEXT: DynamicInstCount: 8888
# CHECK-NEXT: PGO BB entries [
# CHECK-NEXT: {
# RAW-NEXT: Frequency: 9000
@@ -173,7 +175,7 @@ Sections:
Link: .text
Entries:
- Version: 2
- Feature: 0x7
+ Feature: 0x27
BBRanges:
- BaseAddress: [[ADDR=0x11111]]
BBEntries:
@@ -186,7 +188,7 @@ Sections:
Size: 0x4
Metadata: 0x15
- Version: 2
- Feature: 0x3
+ Feature: 0x23
BBRanges:
- BaseAddress: 0x22222
BBEntries:
@@ -196,6 +198,7 @@ Sections:
Metadata: 0x8
PGOAnalyses:
- FuncEntryCount: 100
+ DynamicInstCount: 100
PGOBBEntries:
- BBFreq: 100
Successors:
@@ -204,6 +207,7 @@ Sections:
- BBFreq: 100
Successors: []
- FuncEntryCount: 8888
+ DynamicInstCount: 8888
PGOBBEntries:
- BBFreq: 9000
- Name: dummy_section
@@ -237,4 +241,3 @@ Symbols:
Section: .text.bar
Type: STT_FUNC
Value: 0x33333
-
diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
index 299bf463cf4bc9..37edd968893b3a 100644
--- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
+++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml
@@ -15,7 +15,7 @@
# VALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP
# VALID-NEXT: Entries:
# VALID-NEXT: - Version: 2
-# VALID-NEXT: Feature: 0x7
+# VALID-NEXT: Feature: 0x27
## The 'BaseAddress' field is omitted when it's zero.
# VALID-NEXT: BBRanges:
# VALID-NEXT: - BBEntries:
@@ -42,6 +42,7 @@
# VALID-NEXT: Metadata: 0xC
# VALID-NEXT: PGOAnalyses:
# VALID-NEXT: - FuncEntryCount: 100
+# VALID-NEXT: DynamicInstCount: 200
# VALID-NEXT: PGOBBEntries:
# VALID-NEXT: - BBFreq: 100
# VALID-NEXT: Successors:
@@ -69,7 +70,7 @@ Sections:
ShSize: [[SIZE=<none>]]
Entries:
- Version: 2
- Feature: 0x7
+ Feature: 0x27
BBRanges:
- BaseAddress: 0x0
BBEntries:
@@ -96,6 +97,7 @@ Sections:
Metadata: 0xC
PGOAnalyses:
- FuncEntryCount: 100
+ DynamicInstCount: 200
PGOBBEntries:
- BBFreq: 100
Successors:
@@ -229,4 +231,3 @@ Sections:
# MISSING-FEC-NEXT: - Name: .llvm_bb_addr_map
# MISSING-FEC-NEXT: Type: SHT_LLV...
[truncated]
|
Can you explicate on this argument a bit more? I don't see how this would save binary size when the assembly is already there. It's pretty trivial to parse the basic block frequency and multiply it by the number of instructions in the block in an external tool. We already have llvm-cm in https://github.com/google/gematria that is able to do this and also apply more fancy cost modelling techniques. If there's interest in that tooling, we can also work on upstreaming (https://discourse.llvm.org/t/rfc-llvm-cm-cost-model-evaluation-for-object-files-machine-code/71502). |
Hi @boomanaiden154 , This is very cool, thank you for the pointer, I will take a deep look later.
Sorry the description wasn't clear. I meant to skip all the BB entries, BB frequency/probability things by using this #114447, so that we only have the dyn-inst-count emitted, this can save ~95% of the section |
It needs some updating (originally used the CSV BB frequency dump that got supersceded by the PGO Analysis map), but shouldn't be too difficult to get working.
Ah, that makes more sense. What's the practical limitation here though? Is there some build-system limitation on your side that you're running into with regard to object/executable sizes? |
There isn't a strict limitation, but in one of my test, there is an 100MB+ size overhead, which is not negligible to user. Then people may come to argue that if it’s possible to keep only what’s necessary(we don't enable |
Ack. It's definitely a significant size overhead, but if there are no hard limits, I think it might be hard to justify using something as specific as a function-level dynamic instruction count as a new member for the BBAddrMap. Especially if the end goal is some level of cost modeling.
It might be able to get a more accurate number. We haven't done any validation of
We used an offline tool because we don't really know yet what works in terms of cost modeling, so putting something directly in the compiler didn't make too much sense for experimentation. The info is definitely available in the compiler, but we found it easier for our workflows to have an offline tool do the modeling based on the information rather than trying to do it in the compiler. We could look at using it in the compilation pipeline eventually, and it wouldn't be too difficult to wire up, but we're quite a ways off from that currently. On the Gematria front, the current push is getting more accurate models. As a side note, we've also been doing experimentation with trace based cost modeling for register allocation and have achieved pretty good results for that. We're looking at starting to open source tooling in the coming weeks for that. |
In our case, users are very sensitive to binary size increase. While there is no explicit limit, the increase in size is deal breaker for many of our users.
You're right that it doesn't make sense to commit some experimental stuff. However our use case is different, and this is not experimental. So our goal here isn't really accurate cost modeling. What we need is just simple dynamic instruct count aggregation -- the goal is to tell users instruction count change purely from any user source change (with very limited optimization), and it's not for evaluating compiler optimization. Think of it as a simple, rough, directional signal for developers, and it could be surfaced through IDE or phabricator. We want to keep it simple so developer can reason about it easily. We don't want the complexity from llvm-cm, and we also can't depend on an out-of-tree google tool. I hope that explains why we need this change. |
It seems that this feature uses the The alternative is to define a new extension and implement the parsing/emitting logic separately. Since your structure is basically a map from function addresses to counts, it shouldn't be too hard to implement, though I agree you would need to add code in different places (ObjectYaml, Object, Codegen). @jh7370 may have an opinion here. |
Sorry, I don't think I really have enough background understanding to be able to give an opinion on this topic. |
Sorry for the late reply(was on a long holiday leave) and thanks for the comments.
Let me clarify further, I hope to explain this feature should have very little impact on future maintenance. The new feature(Dynamic-instruciton-count) should be a general and stable feature. It’s not experimental, and the definition is straightforward ,i,e. the total count of all the instructions within a function(think of linux perf tool, it provides the Instructions event which offers different insights compared to the Cycles event), it’s architecture-independent and should rarely require changes. And the code is small, just needs one field encoding/decoding. Regarding the place to implement, yes, as you said, I feel it’s strongly relevant to PGO feature/pgo-analysis-map. Like the function-entry-count, which also doesn’t need bb addressing mapping, and we will use #114447 to skip BB info to save the size.
So I hope we can extend this to @jh7370 Let me share more context with you. We are working on a new feature(dynamic-instruciton-counts, the total PGO counts for all instructions within a function) and its emission. It will be used as a indicator for developer to tell instruction count changes from source code changes without running the binary. As it's a PGO related feature, we initially planned this to be an extension to the existing |
So I think the question here is whether the new extension, and resultant code changes, outweighs the cost of supporting the format within the BB Addr map code paths everywhere. Although I helped review things for the BB Addr map support, I don't really recall how complex it was and how much of an impact adding a different "mode" to it would have. Another question to answer is if one or the other modes/formats needs to be extended in some way, will it require the behaviour to be duplicated in both formats? If @rlavaee thinks it would be better for a new section type to be implemented, so that the two can evolve independently and/or so that the handling of each section type is simpler, I've got no objection to that being the way forward. |
Thank you for your insights! @jh7370 I guess it would be clearer to make decision if we have the patch with the new extension. I'll try implementing one version, we can compare and decide it later. |
Sorry for the delay in response.
I would say even more general like SHT_LLVM_FUNC_ADDR_MAP or simply SHT_LLVM_FUNC_MAP is appropriate. You don't want to limit it to PGO information. |
Sounds good not to limit to PGO info, I will give a try with that. Thanks! |
@rlavaee Here is a draft patch to use the new extension approach #123804 (will add more test/doc before publishing for review), please take a look. As you can find, It's mostly duplicated from the BB_ADDR_MAP code, but I guess it should be ok for maintenance too and good for other feature extensions. I don't have strong option, will follow your advice on which one to go, I really appreciate it! cc @jh7370 too, in case you have more feedback on this, thank you! |
Add a new
PGOAnalysisMap
feature(-pgo-analysis-map=dyn-inst-count
) to emit estimated dynamic instruction count. The dynamic instruction count is calculated as the sum of all instruction's sample count(extracted from PGO MBFI) within the function, extending as a new feature option is to save binary size so that we can skip parsing from BBFreq/BBProb(using #114447). This feature would serve as a performance proxy for PGO optimized binary, that can be leveraged for code analysis tool, such as detect unexpectedly high instruction counts, pinpoint functions with unusually high dynamic instruction, compare instruction counts across different compilation pipelines, etc.