Skip to content

Commit 36cf2d4

Browse files
committed
Greatly simplify instruction decoding
The opcode map was complex and fairly inefficient. Using a simple search is easier to understand and actually works faster in practice. Heuristically speeding up the search by occasionally resorting helps even further.
1 parent 88edb8b commit 36cf2d4

File tree

2 files changed

+57
-69
lines changed

2 files changed

+57
-69
lines changed

riscv/processor.cc

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -662,31 +662,38 @@ reg_t processor_t::throw_instruction_address_misaligned(reg_t pc)
662662

663663
insn_func_t processor_t::decode_insn(insn_t insn)
664664
{
665-
if (!extension_enabled(EXT_ZCA) && insn_length(insn.bits()) % 4)
666-
return &::illegal_instruction;
665+
bool rve = extension_enabled('E');
667666

668-
// look up opcode in hash table
669-
size_t idx = insn.bits() % OPCODE_CACHE_SIZE;
670-
auto [hit, desc] = opcode_cache[idx].lookup(insn.bits());
667+
if (unlikely(!extension_enabled(EXT_ZCA) && insn_length(insn.bits()) % 4))
668+
return &::illegal_instruction;
671669

672-
bool rve = extension_enabled('E');
670+
auto& oc = opcode_cache[insn.bits() % std::size(opcode_cache)];
671+
if (likely(oc[0].first->matches(insn.bits())))
672+
return oc[0].first->func(xlen, rve, log_commits_enabled);
673+
674+
// linearly search the opcode cache
675+
for (size_t i = 1; i < oc.size(); i++) {
676+
auto desc = oc[i].first;
677+
if (desc->matches(insn.bits())) {
678+
oc[i].second++;
679+
680+
// periodically sort the opcode cache in descending order of frequency
681+
if (unlikely(--opcode_cache_sort_count == 0)) {
682+
opcode_cache_sort_count = OPCODE_CACHE_SORT_COUNT;
683+
684+
for (auto& oc : opcode_cache) {
685+
std::stable_sort(oc.begin(), oc.end(), [](auto a, auto b) {
686+
if (a.first->overlap || b.first->overlap)
687+
return false;
688+
return a.second > b.second;
689+
});
690+
}
691+
}
673692

674-
if (unlikely(!hit)) {
675-
// fall back to linear search
676-
auto matching = [insn_bits = insn.bits()](const insn_desc_t &d) {
677-
return (insn_bits & d.mask) == d.match;
678-
};
679-
auto p = std::find_if(custom_instructions.begin(),
680-
custom_instructions.end(), matching);
681-
if (p == custom_instructions.end()) {
682-
p = std::find_if(instructions.begin(), instructions.end(), matching);
683-
assert(p != instructions.end());
693+
return desc->func(xlen, rve, log_commits_enabled);
684694
}
685-
desc = &*p;
686-
opcode_cache[idx].replace(insn.bits(), desc);
687695
}
688-
689-
return desc->func(xlen, rve, log_commits_enabled);
696+
return &::illegal_instruction;
690697
}
691698

692699
void processor_t::register_insn(insn_desc_t desc, bool is_custom) {
@@ -701,8 +708,25 @@ void processor_t::register_insn(insn_desc_t desc, bool is_custom) {
701708

702709
void processor_t::build_opcode_map()
703710
{
704-
for (size_t i = 0; i < OPCODE_CACHE_SIZE; i++)
705-
opcode_cache[i].reset();
711+
const insn_bits_t N = std::size(opcode_cache);
712+
713+
auto insert = [&](auto& insn) {
714+
auto stride = std::min(N, insn_bits_t(1) << ctz(~insn.mask));
715+
716+
for (size_t i = insn.match & (stride - 1); i < N; i += stride)
717+
if ((insn.match % N) == (i & insn.mask))
718+
opcode_cache[i].push_back({&insn, 0});
719+
};
720+
721+
for (size_t i = 0; i < N; i++)
722+
opcode_cache[i].clear();
723+
724+
std::for_each(custom_instructions.begin(), custom_instructions.end(), insert);
725+
std::for_each(instructions.begin(), instructions.end(), insert);
726+
727+
for (size_t i = 0; i < N; i++)
728+
if (opcode_cache[i].empty())
729+
opcode_cache[i].push_back({&insn_desc_t::illegal_instruction, 0});
706730
}
707731

708732
void processor_t::register_extension(extension_t *x) {
@@ -752,7 +776,8 @@ void processor_t::register_base_instructions()
752776
logged_rv32i_##name, \
753777
logged_rv64i_##name, \
754778
logged_rv32e_##name, \
755-
logged_rv64e_##name \
779+
logged_rv64e_##name, \
780+
name##_overlapping \
756781
}; \
757782
register_base_insn(insn); \
758783
}
@@ -775,9 +800,6 @@ void processor_t::register_base_instructions()
775800
#undef DEFINE_INSN
776801
#undef DEFINE_INSN_UNCOND
777802

778-
// terminate instruction list with a catch-all
779-
register_base_insn(insn_desc_t::illegal_instruction);
780-
781803
build_opcode_map();
782804
}
783805

riscv/processor.h

Lines changed: 9 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ struct insn_desc_t
4343
insn_func_t logged_rv64i;
4444
insn_func_t logged_rv32e;
4545
insn_func_t logged_rv64e;
46+
bool overlap = true;
47+
48+
bool matches(insn_bits_t insn) const
49+
{
50+
return (insn & mask) == match;
51+
}
4652

4753
insn_func_t func(int xlen, bool rve, bool logged) const
4854
{
@@ -207,47 +213,6 @@ struct state_t
207213
void csr_init(processor_t* const proc, reg_t max_isa);
208214
};
209215

210-
class opcode_cache_entry_t {
211-
public:
212-
opcode_cache_entry_t()
213-
{
214-
reset();
215-
}
216-
217-
void reset()
218-
{
219-
for (size_t i = 0; i < associativity; i++) {
220-
tag[i] = 0;
221-
contents[i] = &insn_desc_t::illegal_instruction;
222-
}
223-
}
224-
225-
void replace(insn_bits_t opcode, const insn_desc_t* desc)
226-
{
227-
for (size_t i = associativity - 1; i > 0; i--) {
228-
tag[i] = tag[i-1];
229-
contents[i] = contents[i-1];
230-
}
231-
232-
tag[0] = opcode;
233-
contents[0] = desc;
234-
}
235-
236-
std::tuple<bool, const insn_desc_t*> lookup(insn_bits_t opcode)
237-
{
238-
for (size_t i = 0; i < associativity; i++)
239-
if (tag[i] == opcode)
240-
return std::tuple(true, contents[i]);
241-
242-
return std::tuple(false, nullptr);
243-
}
244-
245-
private:
246-
static const size_t associativity = 4;
247-
insn_bits_t tag[associativity];
248-
const insn_desc_t* contents[associativity];
249-
};
250-
251216
// this class represents one processor in a RISC-V machine.
252217
class processor_t : public abstract_device_t
253218
{
@@ -409,8 +374,9 @@ class processor_t : public abstract_device_t
409374
std::vector<insn_desc_t> custom_instructions;
410375
std::unordered_map<reg_t,uint64_t> pc_histogram;
411376

412-
static const size_t OPCODE_CACHE_SIZE = 4095;
413-
opcode_cache_entry_t opcode_cache[OPCODE_CACHE_SIZE];
377+
static const size_t OPCODE_CACHE_SORT_COUNT = 5000000;
378+
size_t opcode_cache_sort_count = OPCODE_CACHE_SORT_COUNT;
379+
std::vector<std::pair<const insn_desc_t*, reg_t>> opcode_cache[128];
414380

415381
void take_pending_interrupt() { take_interrupt(state.mip->read() & state.mie->read()); }
416382
void take_interrupt(reg_t mask); // take first enabled interrupt in mask

0 commit comments

Comments
 (0)