Skip to content

Commit cf89048

Browse files
committed
PPU LLVM: Fix optimization of branch string
1 parent 52495c1 commit cf89048

File tree

2 files changed

+40
-17
lines changed

2 files changed

+40
-17
lines changed

rpcs3/Emu/Cell/PPUTranslator.cpp

+38-15
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
266266
if (!m_ir->GetInsertBlock()->getTerminator())
267267
{
268268
FlushRegisters();
269-
CallFunction(m_addr);
269+
CallFunction(m_addr, nullptr, m_ir->GetInsertBlock());
270270
}
271271
}
272272

@@ -354,7 +354,7 @@ Value* PPUTranslator::RotateLeft(Value* arg, Value* n)
354354
return m_ir->CreateOr(m_ir->CreateShl(arg, m_ir->CreateAnd(n, mask)), m_ir->CreateLShr(arg, m_ir->CreateAnd(m_ir->CreateNeg(n), mask)));
355355
}
356356

357-
void PPUTranslator::CallFunction(u64 target, Value* indirect)
357+
void PPUTranslator::CallFunction(u64 target, Value* indirect, BasicBlock* prev_block)
358358
{
359359
const auto type = m_function->getFunctionType();
360360
const auto block = m_ir->GetInsertBlock();
@@ -372,21 +372,27 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
372372

373373
if (_target >= caddr && _target <= cend)
374374
{
375-
std::unordered_set<u64> passed_targets{_target};
375+
std::unordered_set<u64> passed_targets;
376376

377377
u32 target_last = _target;
378378

379379
// Try to follow unconditional branches as long as there is no infinite loop
380-
while (target_last != _target)
380+
while (target_last != m_addr + base)
381381
{
382+
if (passed_targets.empty())
383+
{
384+
passed_targets.emplace(_target);
385+
passed_targets.emplace(m_addr + base);
386+
}
387+
382388
const ppu_opcode_t op{*ensure(m_info.get_ptr<u32>(target_last))};
383389
const ppu_itype::type itype = g_ppu_itype.decode(op.opcode);
384390

385391
if (((itype == ppu_itype::BC && (op.bo & 0x14) == 0x14) || itype == ppu_itype::B) && !op.lk)
386392
{
387393
const u32 new_target = (op.aa ? 0 : target_last) + (itype == ppu_itype::B ? +op.bt24 : +op.bt14);
388394

389-
if (target_last >= caddr && target_last <= cend)
395+
if (new_target >= caddr && new_target <= cend)
390396
{
391397
if (passed_targets.emplace(new_target).second)
392398
{
@@ -401,11 +407,21 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
401407

402408
// Odd destination
403409
}
404-
else if (itype == ppu_itype::BCLR && (op.bo & 0x14) == 0x14 && !op.lk)
410+
else if (itype == ppu_itype::BCLR && (op.bo & 0x14) == 0x14 && !op.lk && (prev_block || m_lr))
405411
{
406412
// Special case: empty function
407413
// In this case the branch can be treated as BCLR because previous CIA does not matter
408-
indirect = RegLoad(m_lr);
414+
indirect = m_lr;
415+
416+
if (!indirect)
417+
{
418+
// Emit register load in the beginning of the common block
419+
m_ir->SetInsertPoint(prev_block, prev_block->getFirstInsertionPt());
420+
indirect = RegLoad(m_lr);
421+
422+
// Restore current insert point
423+
m_ir->SetInsertPoint(block);
424+
}
409425
}
410426

411427
break;
@@ -629,7 +645,7 @@ Value* PPUTranslator::Trunc(Value* value, Type* type)
629645
return type != value->getType() ? m_ir->CreateTrunc(value, type) : value;
630646
}
631647

632-
void PPUTranslator::UseCondition(MDNode* hint, Value* cond)
648+
void PPUTranslator::UseCondition(MDNode* hint, Value* cond, BasicBlock* prev_block)
633649
{
634650
FlushRegisters();
635651

@@ -639,7 +655,7 @@ void PPUTranslator::UseCondition(MDNode* hint, Value* cond)
639655
const auto next = BasicBlock::Create(m_context, "__next", m_function);
640656
m_ir->CreateCondBr(cond, local, next, hint);
641657
m_ir->SetInsertPoint(next);
642-
CallFunction(m_addr + 4);
658+
CallFunction(m_addr + 4, nullptr, prev_block);
643659
m_ir->SetInsertPoint(local);
644660
}
645661
}
@@ -2024,19 +2040,22 @@ void PPUTranslator::BC(ppu_opcode_t op)
20242040
const s32 bt14 = op.bt14; // Workaround for VS 16.5
20252041
const u64 target = (op.aa ? 0 : m_addr) + bt14;
20262042

2043+
const auto block = m_ir->GetInsertBlock();
2044+
20272045
if (op.aa && m_reloc)
20282046
{
20292047
CompilationError("Branch with absolute address");
20302048
}
20312049

20322050
if (op.lk)
20332051
{
2034-
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
2052+
m_lr = GetAddr(+4);
2053+
m_ir->CreateStore(m_lr, m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
20352054
}
20362055

2037-
UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi));
2056+
UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi), block);
20382057

2039-
CallFunction(target);
2058+
CallFunction(target, nullptr, block);
20402059
}
20412060

20422061
void PPUTranslator::SC(ppu_opcode_t op)
@@ -2074,6 +2093,8 @@ void PPUTranslator::B(ppu_opcode_t op)
20742093
const s32 bt24 = op.bt24; // Workaround for VS 16.5
20752094
const u64 target = (op.aa ? 0 : m_addr) + bt24;
20762095

2096+
const auto block = m_ir->GetInsertBlock();
2097+
20772098
if (op.aa && m_reloc)
20782099
{
20792100
CompilationError("Branch with absolute address");
@@ -2085,7 +2106,7 @@ void PPUTranslator::B(ppu_opcode_t op)
20852106
}
20862107

20872108
FlushRegisters();
2088-
CallFunction(target);
2109+
CallFunction(target, nullptr, block);
20892110
}
20902111

20912112
void PPUTranslator::MCRF(ppu_opcode_t op)
@@ -2103,7 +2124,8 @@ void PPUTranslator::BCLR(ppu_opcode_t op)
21032124

21042125
if (op.lk)
21052126
{
2106-
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
2127+
m_lr = GetAddr(+4);
2128+
m_ir->CreateStore(m_lr, m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
21072129
}
21082130

21092131
UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi));
@@ -2166,7 +2188,8 @@ void PPUTranslator::BCCTR(ppu_opcode_t op)
21662188

21672189
if (op.lk)
21682190
{
2169-
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
2191+
m_lr = GetAddr(+4);
2192+
m_ir->CreateStore(m_lr, m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
21702193
}
21712194

21722195
UseCondition(CheckBranchProbability(op.bo | 0x4), CheckBranchCondition(op.bo | 0x4, op.bi));

rpcs3/Emu/Cell/PPUTranslator.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ class PPUTranslator final : public cpu_translator
148148
llvm::Value* RotateLeft(llvm::Value* arg, llvm::Value* n);
149149

150150
// Emit function call
151-
void CallFunction(u64 target, llvm::Value* indirect = nullptr);
151+
void CallFunction(u64 target, llvm::Value* indirect = nullptr, llvm::BasicBlock* prev_block = nullptr);
152152

153153
// Initialize global for writing
154154
llvm::Value* RegInit(llvm::Value*& local);
@@ -292,7 +292,7 @@ class PPUTranslator final : public cpu_translator
292292
llvm::MDNode* CheckBranchProbability(u32 bo);
293293

294294
// Branch to next instruction if condition failed, never branch on nullptr
295-
void UseCondition(llvm::MDNode* hint, llvm::Value* = nullptr);
295+
void UseCondition(llvm::MDNode* hint, llvm::Value* = nullptr, llvm::BasicBlock* prev_block = nullptr);
296296

297297
// Get memory pointer
298298
llvm::Value* GetMemory(llvm::Value* addr);

0 commit comments

Comments
 (0)