Skip to content

Commit b1c7f88

Browse files
committed
PPU LLVM: Recycle identical functions
1 parent 1643fad commit b1c7f88

File tree

5 files changed

+214
-10
lines changed

5 files changed

+214
-10
lines changed

rpcs3/Emu/Cell/PPUAnalyser.cpp

+134
Original file line numberDiff line numberDiff line change
@@ -2046,6 +2046,140 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
20462046
}
20472047

20482048
ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size());
2049+
2050+
std::unordered_map<std::string_view, std::pair<u32, u32>> duplicate_data_map;
2051+
duplicate_map.clear();
2052+
2053+
for (auto& func : funcs)
2054+
{
2055+
if (func.size == 0 || func.size > 10000u)
2056+
{
2057+
continue;
2058+
}
2059+
2060+
auto& data = duplicate_data_map[std::string_view{get_ptr<char>(func.addr), func.size}];
2061+
2062+
const usz count = data.first;
2063+
2064+
if (!count)
2065+
{
2066+
data.first++;
2067+
data.second = func.addr;
2068+
continue;
2069+
}
2070+
2071+
if (!data.second)
2072+
{
2073+
continue;
2074+
}
2075+
2076+
if (count == 1)
2077+
{
2078+
const u32 faddr = func.addr;
2079+
const u32 fend = func.addr + func.size;
2080+
2081+
bool fail = false;
2082+
2083+
//for (const auto [addr, size] : func.blocks)
2084+
const u32 addr = func.addr;
2085+
const u32 size = func.size;
2086+
{
2087+
if (size == 0)
2088+
{
2089+
continue;
2090+
}
2091+
2092+
auto i_ptr = ensure(get_ptr<u32>(addr));
2093+
2094+
for (u32 i = addr; i < addr + size; i += 4, i_ptr++)
2095+
{
2096+
const ppu_opcode_t op{*i_ptr};
2097+
const auto itype = s_ppu_itype.decode(op.opcode);
2098+
2099+
if (itype != ppu_itype::BC && itype != ppu_itype::B)
2100+
{
2101+
if (i == fend - 4)
2102+
{
2103+
if (!(itype & ppu_itype::branch) && itype != ppu_itype::SC)
2104+
{
2105+
// Inserts a branch to following code
2106+
fail = true;
2107+
break;
2108+
}
2109+
}
2110+
2111+
continue;
2112+
}
2113+
2114+
const u32 target = (op.aa ? 0 : i) + (itype == ppu_itype::B ? +op.bt24 : +op.bt14);
2115+
2116+
if (target >= fend || target < faddr)
2117+
{
2118+
fail = true;
2119+
break;
2120+
}
2121+
2122+
if (itype == ppu_itype::BC && (op.bo & 0x14) != 0x14)
2123+
{
2124+
if (i == fend - 4)
2125+
{
2126+
// Can branch to next
2127+
fail = true;
2128+
break;
2129+
}
2130+
}
2131+
}
2132+
}
2133+
2134+
if (fail)
2135+
{
2136+
data.first = 1;
2137+
data.second = 0;
2138+
continue;
2139+
}
2140+
}
2141+
2142+
data.first++;
2143+
2144+
// Choose the lowest function as the source
2145+
data.second = std::min<u32>(data.second, func.addr);
2146+
}
2147+
2148+
usz dups_count = 0;
2149+
2150+
for (auto& func : funcs)
2151+
{
2152+
if (func.size == 0 || func.size > 10000u)
2153+
{
2154+
continue;
2155+
}
2156+
2157+
const auto data = ::at32(duplicate_data_map, std::string_view{get_ptr<char>(func.addr), func.size});
2158+
2159+
if (data.first > 1)
2160+
{
2161+
duplicate_map[func.addr] = data.second;
2162+
2163+
for (const auto [addr, size] : func.blocks)
2164+
{
2165+
if (size == 0 || addr >= func.addr + func.size)
2166+
{
2167+
continue;
2168+
}
2169+
2170+
duplicate_map[addr] = data.second + (addr - func.addr);
2171+
}
2172+
2173+
if (func.addr != data.second)
2174+
{
2175+
dups_count++;
2176+
}
2177+
2178+
ppu_log.trace("Found PPU function duplicate: func 0x%x vs 0x%x (%d times) (size=%d)", func.addr, data.second, data.first, func.size);
2179+
}
2180+
}
2181+
2182+
ppu_log.success("Function duplication count: %d/%d (%g%)", dups_count, duplicate_data_map.size(), dups_count * 100.0 / duplicate_data_map.size());
20492183
return true;
20502184
}
20512185

rpcs3/Emu/Cell/PPUAnalyser.h

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ struct ppu_module
9595
std::vector<ppu_function> funcs{};
9696
std::deque<std::shared_ptr<void>> allocations;
9797
std::map<u32, u32> addr_to_seg_index;
98+
std::unordered_map<u32, u32> duplicate_map;
9899

99100
// Copy info without functions
100101
void copy_part(const ppu_module& info)
@@ -107,6 +108,7 @@ struct ppu_module
107108
secs = info.secs;
108109
allocations = info.allocations;
109110
addr_to_seg_index = info.addr_to_seg_index;
111+
duplicate_map = info.duplicate_map;
110112
}
111113

112114
bool analyse(u32 lib_toc, u32 entry, u32 end, const std::basic_string<u32>& applied, std::function<bool()> check_aborted = {});

rpcs3/Emu/Cell/PPUThread.cpp

+46-6
Original file line numberDiff line numberDiff line change
@@ -4146,7 +4146,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
41464146
}
41474147

41484148
ppu_log.notice("Failed to precompile '%s' (prx: %s, ovl: %s): Attempting tratment as executable file", path, prx_err, ovl_err);
4149-
possible_exec_file_paths.push(path, offset, file_size);
4149+
possible_exec_file_paths.push(file_queue[func_i]);
41504150
inc_fdone = 0;
41514151
}
41524152
});
@@ -4643,8 +4643,15 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
46434643
// Copy block or function entry
46444644
ppu_function& entry = part.funcs.emplace_back(func);
46454645

4646+
u32 og_func = entry.addr;
4647+
4648+
if (auto it = info.duplicate_map.find(entry.addr); it != info.duplicate_map.end())
4649+
{
4650+
og_func = it->second;
4651+
}
4652+
46464653
// Fixup some information
4647-
entry.name = fmt::format("__0x%x", entry.addr - reloc);
4654+
entry.name = fmt::format("__0x%x", og_func - reloc);
46484655

46494656
if (has_mfvscr && g_cfg.core.ppu_set_sat_bit)
46504657
{
@@ -4808,7 +4815,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
48084815
settings += ppu_settings::accurate_nj_mode, settings -= ppu_settings::fixup_nj_denormals, fmt::throw_exception("NJ Not implemented");
48094816

48104817
// Write version, hash, CPU, settings
4811-
fmt::append(obj_name, "v6-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
4818+
fmt::append(obj_name, "v7-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
48124819
}
48134820

48144821
if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped())
@@ -5037,6 +5044,8 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
50375044

50385045
bool early_exit = false;
50395046

5047+
std::map<std::string, ppu_intrp_func_t> func_ptr_map;
5048+
50405049
// Get and install function addresses
50415050
for (const auto& func : info.funcs)
50425051
{
@@ -5054,12 +5063,29 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
50545063
break;
50555064
}
50565065

5057-
const auto name = fmt::format("__0x%x", func.addr - reloc);
5066+
u32 og_func = func.addr;
5067+
5068+
if (auto it = info.duplicate_map.find(func.addr); it != info.duplicate_map.end())
5069+
{
5070+
og_func = it->second;
5071+
}
5072+
5073+
const auto name = fmt::format("__0x%x", og_func - reloc);
5074+
5075+
ppu_intrp_func_t dummy{};
5076+
ppu_intrp_func_t& func_ptr = is_first ? func_ptr_map[name] : dummy;
50585077

50595078
// Try to locate existing function if it is not the first time
5060-
const auto addr = is_first ? ensure(reinterpret_cast<ppu_intrp_func_t>(jit->get(name)))
5061-
: reinterpret_cast<ppu_intrp_func_t>(ensure(jit_mod.funcs[index]));
5079+
const auto addr = is_first ? (func_ptr ? func_ptr : (reinterpret_cast<ppu_intrp_func_t>(jit->get(name))))
5080+
: reinterpret_cast<ppu_intrp_func_t>(jit_mod.funcs[index]);
5081+
5082+
if (!addr)
5083+
{
5084+
ppu_log.fatal("Failed to retrieve symbol address at 0x%x (duplicate=0x%x)", func.addr, info.duplicate_map.contains(func.addr) ? og_func : 0);
5085+
ensure(addr);
5086+
}
50625087

5088+
func_ptr = addr;
50635089
jit_mod.funcs.emplace_back(addr);
50645090

50655091
if (func.size == 4 && !BLR_func && *info.get_ptr<u32>(func.addr) == ppu_instructions::BLR())
@@ -5148,6 +5174,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
51485174
{
51495175
if (func.size)
51505176
{
5177+
if (auto it = module_part.duplicate_map.find(func.addr); it != module_part.duplicate_map.end() && it->second != it->first)
5178+
{
5179+
continue;
5180+
}
5181+
51515182
const auto f = cast<Function>(_module->getOrInsertFunction(func.name, _func).getCallee());
51525183
f->setCallingConv(CallingConv::GHC);
51535184
f->addParamAttr(1, llvm::Attribute::NoAlias);
@@ -5194,6 +5225,15 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
51945225

51955226
if (module_part.funcs[fi].size)
51965227
{
5228+
const u32 faddr = module_part.funcs[fi].addr;
5229+
auto it = module_part.duplicate_map.find(faddr);
5230+
5231+
if (it != module_part.duplicate_map.end() && it->second != faddr)
5232+
{
5233+
ppu_log.trace("LLVM: Function 0x%x was skipped (duplicate)", faddr);
5234+
continue;
5235+
}
5236+
51975237
// Translate
51985238
if (const auto func = translator.Translate(module_part.funcs[fi]))
51995239
{

rpcs3/Emu/Cell/PPUTranslator.cpp

+29-4
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
148148
const u64 base = m_reloc ? m_reloc->addr : 0;
149149
m_addr = info.addr - base;
150150
m_attr = info.attr;
151+
m_func_base = m_addr;
151152

152153
// Don't emit check in small blocks without terminator
153154
bool need_check = info.size >= 16;
@@ -304,13 +305,29 @@ Value* PPUTranslator::VecHandleResult(Value* val)
304305

305306
Value* PPUTranslator::GetAddr(u64 _add)
306307
{
307-
if (m_reloc)
308+
const auto old_cia = std::exchange(m_cia, nullptr);
309+
310+
const bool is_duplicate = m_info.duplicate_map.contains(m_func_base);
311+
const auto cia_add = is_duplicate ? ZExt(RegLoad(m_cia)) : nullptr;
312+
const u32 inst_diff = is_duplicate ? m_addr - m_func_base : m_addr;
313+
314+
// Restore value
315+
m_cia = old_cia;
316+
317+
Value* addr = nullptr;
318+
319+
if (is_duplicate)
320+
{
321+
// Add to current CIA
322+
return m_ir->CreateAdd(m_ir->getInt64(inst_diff + _add), cia_add);
323+
}
324+
else if (m_reloc)
308325
{
309326
// Load segment address from global variable, compute actual instruction address
310-
return m_ir->CreateAdd(m_ir->getInt64(m_addr + _add), m_seg0);
327+
return m_ir->CreateAdd(m_ir->getInt64(inst_diff + _add), m_seg0);
311328
}
312329

313-
return m_ir->getInt64(m_addr + _add);
330+
return m_ir->getInt64(inst_diff + _add);
314331
}
315332

316333
Type* PPUTranslator::ScaleType(Type* type, s32 pow2)
@@ -419,7 +436,15 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
419436

420437
if (!indirect)
421438
{
422-
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target_last - base), type);
439+
const auto it = m_info.duplicate_map.find(target_last);
440+
const u32 first_func = it == m_info.duplicate_map.end() ? target_last : it->second;
441+
442+
if (base)
443+
{
444+
ensure(first_func >= base && target_last >= base);
445+
}
446+
447+
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", first_func - base), type);
423448
cast<Function>(callee.getCallee())->setCallingConv(CallingConv::GHC);
424449
}
425450
}

rpcs3/Emu/Cell/PPUTranslator.h

+3
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ class PPUTranslator final : public cpu_translator
2828
// Current position-independent address
2929
u64 m_addr = 0;
3030

31+
// Function start
32+
u64 m_func_base = 0;
33+
3134
// Function attributes
3235
bs_t<ppu_attr> m_attr{};
3336

0 commit comments

Comments
 (0)