This is *very* slow. You don't want to enable it if you don't need it. It is also incompatible with the recompiler and will disable it if the option is enabled.
588 lines
16 KiB
C++
588 lines
16 KiB
C++
#include "cpu_code_cache.h"
|
|
#include "bus.h"
|
|
#include "common/assert.h"
|
|
#include "common/log.h"
|
|
#include "cpu_core.h"
|
|
#include "cpu_core_private.h"
|
|
#include "cpu_disasm.h"
|
|
#include "system.h"
|
|
#include "timing_event.h"
|
|
Log_SetChannel(CPU::CodeCache);
|
|
|
|
#ifdef WITH_RECOMPILER
|
|
#include "cpu_recompiler_code_generator.h"
|
|
#endif
|
|
|
|
namespace CPU::CodeCache {
|
|
|
|
constexpr bool USE_BLOCK_LINKING = true;
|
|
|
|
#ifdef WITH_RECOMPILER
|
|
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 32 * 1024 * 1024;
|
|
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 32 * 1024 * 1024;
|
|
static constexpr u32 RECOMPILER_GUARD_SIZE = 4096;
|
|
alignas(Recompiler::CODE_STORAGE_ALIGNMENT) static u8
|
|
s_code_storage[RECOMPILER_CODE_CACHE_SIZE + RECOMPILER_FAR_CODE_CACHE_SIZE];
|
|
static JitCodeBuffer s_code_buffer;
|
|
|
|
enum : u32
|
|
{
|
|
FAST_MAP_RAM_SLOT_COUNT = Bus::RAM_SIZE / 4,
|
|
FAST_MAP_BIOS_SLOT_COUNT = Bus::BIOS_SIZE / 4,
|
|
FAST_MAP_TOTAL_SLOT_COUNT = FAST_MAP_RAM_SLOT_COUNT + FAST_MAP_BIOS_SLOT_COUNT,
|
|
};
|
|
|
|
std::array<CodeBlock::HostCodePointer, FAST_MAP_TOTAL_SLOT_COUNT> s_fast_map;
|
|
|
|
ALWAYS_INLINE static u32 GetFastMapIndex(u32 pc)
|
|
{
|
|
return ((pc & PHYSICAL_MEMORY_ADDRESS_MASK) >= Bus::BIOS_BASE) ?
|
|
(FAST_MAP_RAM_SLOT_COUNT + ((pc & Bus::BIOS_MASK) >> 2)) :
|
|
((pc & Bus::RAM_MASK) >> 2);
|
|
}
|
|
|
|
static void FastCompileBlockFunction();
|
|
|
|
static void ResetFastMap()
|
|
{
|
|
s_fast_map.fill(FastCompileBlockFunction);
|
|
}
|
|
|
|
static void SetFastMap(u32 pc, CodeBlock::HostCodePointer function)
|
|
{
|
|
s_fast_map[GetFastMapIndex(pc)] = function;
|
|
}
|
|
|
|
#endif
|
|
|
|
using BlockMap = std::unordered_map<u32, CodeBlock*>;
|
|
|
|
void LogCurrentState();
|
|
|
|
/// Returns the block key for the current execution state.
|
|
static CodeBlockKey GetNextBlockKey();
|
|
|
|
/// Looks up the block in the cache if it's already been compiled.
|
|
static CodeBlock* LookupBlock(CodeBlockKey key);
|
|
|
|
/// Can the current block execute? This will re-validate the block if necessary.
|
|
/// The block can also be flushed if recompilation failed, so ignore the pointer if false is returned.
|
|
static bool RevalidateBlock(CodeBlock* block);
|
|
|
|
static bool CompileBlock(CodeBlock* block);
|
|
static void FlushBlock(CodeBlock* block);
|
|
static void AddBlockToPageMap(CodeBlock* block);
|
|
static void RemoveBlockFromPageMap(CodeBlock* block);
|
|
|
|
/// Link block from to to.
|
|
static void LinkBlock(CodeBlock* from, CodeBlock* to);
|
|
|
|
/// Unlink all blocks which point to this block, and any that this block links to.
|
|
static void UnlinkBlock(CodeBlock* block);
|
|
|
|
static bool s_use_recompiler = false;
|
|
static BlockMap s_blocks;
|
|
static std::array<std::vector<CodeBlock*>, CPU_CODE_CACHE_PAGE_COUNT> m_ram_block_map;
|
|
|
|
void Initialize(bool use_recompiler)
|
|
{
|
|
Assert(s_blocks.empty());
|
|
|
|
#ifdef WITH_RECOMPILER
|
|
s_use_recompiler = use_recompiler;
|
|
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
|
|
RECOMPILER_GUARD_SIZE))
|
|
{
|
|
Panic("Failed to initialize code space");
|
|
}
|
|
|
|
ResetFastMap();
|
|
#else
|
|
s_use_recompiler = false;
|
|
#endif
|
|
}
|
|
|
|
void Shutdown()
|
|
{
|
|
Flush();
|
|
#ifdef WITH_RECOMPILER
|
|
s_code_buffer.Destroy();
|
|
#endif
|
|
}
|
|
|
|
template<PGXPMode pgxp_mode>
|
|
static void ExecuteImpl()
|
|
{
|
|
CodeBlockKey next_block_key;
|
|
|
|
g_state.frame_done = false;
|
|
while (!g_state.frame_done)
|
|
{
|
|
TimingEvents::UpdateCPUDowncount();
|
|
|
|
next_block_key = GetNextBlockKey();
|
|
while (g_state.pending_ticks < g_state.downcount)
|
|
{
|
|
if (HasPendingInterrupt())
|
|
{
|
|
// TODO: Fill in m_next_instruction...
|
|
SafeReadMemoryWord(g_state.regs.pc, &g_state.next_instruction.bits);
|
|
DispatchInterrupt();
|
|
next_block_key = GetNextBlockKey();
|
|
}
|
|
|
|
CodeBlock* block = LookupBlock(next_block_key);
|
|
if (!block)
|
|
{
|
|
Log_WarningPrintf("Falling back to uncached interpreter at 0x%08X", g_state.regs.pc);
|
|
InterpretUncachedBlock();
|
|
continue;
|
|
}
|
|
|
|
reexecute_block:
|
|
|
|
#if 0
|
|
const u32 tick = TimingEvents::GetGlobalTickCounter() + CPU::GetPendingTicks();
|
|
if (tick == 4188233674)
|
|
__debugbreak();
|
|
#endif
|
|
|
|
#if 0
|
|
LogCurrentState();
|
|
#endif
|
|
|
|
if (s_use_recompiler)
|
|
{
|
|
g_state.current_instruction_pc = g_state.regs.pc;
|
|
block->host_code();
|
|
}
|
|
else
|
|
{
|
|
InterpretCachedBlock<pgxp_mode>(*block);
|
|
}
|
|
|
|
if (g_state.pending_ticks >= g_state.downcount)
|
|
break;
|
|
else if (HasPendingInterrupt() || !USE_BLOCK_LINKING)
|
|
continue;
|
|
|
|
next_block_key = GetNextBlockKey();
|
|
if (next_block_key.bits == block->key.bits)
|
|
{
|
|
// we can jump straight to it if there's no pending interrupts
|
|
// ensure it's not a self-modifying block
|
|
if (!block->invalidated || RevalidateBlock(block))
|
|
goto reexecute_block;
|
|
}
|
|
else if (!block->invalidated)
|
|
{
|
|
// Try to find an already-linked block.
|
|
// TODO: Don't need to dereference the block, just store a pointer to the code.
|
|
for (CodeBlock* linked_block : block->link_successors)
|
|
{
|
|
if (linked_block->key.bits == next_block_key.bits)
|
|
{
|
|
if (linked_block->invalidated && !RevalidateBlock(linked_block))
|
|
{
|
|
// CanExecuteBlock can result in a block flush, so stop iterating here.
|
|
break;
|
|
}
|
|
|
|
// Execute the linked block
|
|
block = linked_block;
|
|
goto reexecute_block;
|
|
}
|
|
}
|
|
|
|
// No acceptable blocks found in the successor list, try a new one.
|
|
CodeBlock* next_block = LookupBlock(next_block_key);
|
|
if (next_block)
|
|
{
|
|
// Link the previous block to this new block if we find a new block.
|
|
LinkBlock(block, next_block);
|
|
block = next_block;
|
|
goto reexecute_block;
|
|
}
|
|
}
|
|
}
|
|
|
|
TimingEvents::RunEvents();
|
|
}
|
|
|
|
// in case we switch to interpreter...
|
|
g_state.regs.npc = g_state.regs.pc;
|
|
}
|
|
|
|
void Execute()
|
|
{
|
|
if (g_settings.gpu_pgxp_enable)
|
|
{
|
|
if (g_settings.gpu_pgxp_cpu)
|
|
ExecuteImpl<PGXPMode::CPU>();
|
|
else
|
|
ExecuteImpl<PGXPMode::Memory>();
|
|
}
|
|
else
|
|
{
|
|
ExecuteImpl<PGXPMode::Disabled>();
|
|
}
|
|
}
|
|
|
|
#ifdef WITH_RECOMPILER
|
|
|
|
void ExecuteRecompiler()
|
|
{
|
|
g_state.frame_done = false;
|
|
while (!g_state.frame_done)
|
|
{
|
|
TimingEvents::UpdateCPUDowncount();
|
|
|
|
while (g_state.pending_ticks < g_state.downcount)
|
|
{
|
|
if (HasPendingInterrupt())
|
|
{
|
|
SafeReadMemoryWord(g_state.regs.pc, &g_state.next_instruction.bits);
|
|
DispatchInterrupt();
|
|
}
|
|
|
|
const u32 pc = g_state.regs.pc;
|
|
g_state.current_instruction_pc = pc;
|
|
const u32 fast_map_index = GetFastMapIndex(pc);
|
|
s_fast_map[fast_map_index]();
|
|
}
|
|
|
|
TimingEvents::RunEvents();
|
|
}
|
|
|
|
// in case we switch to interpreter...
|
|
g_state.regs.npc = g_state.regs.pc;
|
|
}
|
|
|
|
#endif
|
|
|
|
void SetUseRecompiler(bool enable)
|
|
{
|
|
#ifdef WITH_RECOMPILER
|
|
if (s_use_recompiler == enable)
|
|
return;
|
|
|
|
s_use_recompiler = enable;
|
|
Flush();
|
|
#endif
|
|
}
|
|
|
|
void Flush()
|
|
{
|
|
Bus::ClearRAMCodePageFlags();
|
|
for (auto& it : m_ram_block_map)
|
|
it.clear();
|
|
|
|
for (const auto& it : s_blocks)
|
|
delete it.second;
|
|
s_blocks.clear();
|
|
#ifdef WITH_RECOMPILER
|
|
s_code_buffer.Reset();
|
|
ResetFastMap();
|
|
#endif
|
|
}
|
|
|
|
void LogCurrentState()
|
|
{
|
|
const auto& regs = g_state.regs;
|
|
WriteToExecutionLog("tick=%u pc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
|
|
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
|
|
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X ldr=%s "
|
|
"ldv=%08X\n",
|
|
TimingEvents::GetGlobalTickCounter() + GetPendingTicks(), regs.pc, regs.zero, regs.at, regs.v0,
|
|
regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5,
|
|
regs.t6, regs.t7, regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8,
|
|
regs.t9, regs.k0, regs.k1, regs.gp, regs.sp, regs.fp, regs.ra,
|
|
(g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg),
|
|
(g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value);
|
|
}
|
|
|
|
CodeBlockKey GetNextBlockKey()
|
|
{
|
|
CodeBlockKey key = {};
|
|
key.SetPC(g_state.regs.pc);
|
|
key.user_mode = InUserMode();
|
|
return key;
|
|
}
|
|
|
|
CodeBlock* LookupBlock(CodeBlockKey key)
|
|
{
|
|
BlockMap::iterator iter = s_blocks.find(key.bits);
|
|
if (iter != s_blocks.end())
|
|
{
|
|
// ensure it hasn't been invalidated
|
|
CodeBlock* existing_block = iter->second;
|
|
if (!existing_block || !existing_block->invalidated || RevalidateBlock(existing_block))
|
|
return existing_block;
|
|
}
|
|
|
|
CodeBlock* block = new CodeBlock(key);
|
|
if (CompileBlock(block))
|
|
{
|
|
// add it to the page map if it's in ram
|
|
AddBlockToPageMap(block);
|
|
|
|
#ifdef WITH_RECOMPILER
|
|
SetFastMap(block->GetPC(), block->host_code);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
Log_ErrorPrintf("Failed to compile block at PC=0x%08X", key.GetPC());
|
|
delete block;
|
|
block = nullptr;
|
|
}
|
|
|
|
iter = s_blocks.emplace(key.bits, block).first;
|
|
return block;
|
|
}
|
|
|
|
bool RevalidateBlock(CodeBlock* block)
|
|
{
|
|
for (const CodeBlockInstruction& cbi : block->instructions)
|
|
{
|
|
u32 new_code = Bus::ReadCacheableAddress(cbi.pc & PHYSICAL_MEMORY_ADDRESS_MASK);
|
|
if (cbi.instruction.bits != new_code)
|
|
{
|
|
Log_DebugPrintf("Block 0x%08X changed at PC 0x%08X - %08X to %08X - recompiling.", block->GetPC(), cbi.pc,
|
|
cbi.instruction.bits, new_code);
|
|
goto recompile;
|
|
}
|
|
}
|
|
|
|
// re-add it to the page map since it's still up-to-date
|
|
block->invalidated = false;
|
|
AddBlockToPageMap(block);
|
|
#ifdef WITH_RECOMPILER
|
|
SetFastMap(block->GetPC(), block->host_code);
|
|
#endif
|
|
return true;
|
|
|
|
recompile:
|
|
block->instructions.clear();
|
|
if (!CompileBlock(block))
|
|
{
|
|
Log_WarningPrintf("Failed to recompile block 0x%08X - flushing.", block->GetPC());
|
|
FlushBlock(block);
|
|
return false;
|
|
}
|
|
|
|
// re-add to page map again
|
|
if (block->IsInRAM())
|
|
AddBlockToPageMap(block);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool CompileBlock(CodeBlock* block)
|
|
{
|
|
u32 pc = block->GetPC();
|
|
bool is_branch_delay_slot = false;
|
|
bool is_load_delay_slot = false;
|
|
|
|
#if 0
|
|
if (pc == 0x0005aa90)
|
|
__debugbreak();
|
|
#endif
|
|
|
|
for (;;)
|
|
{
|
|
CodeBlockInstruction cbi = {};
|
|
|
|
const PhysicalMemoryAddress phys_addr = pc & PHYSICAL_MEMORY_ADDRESS_MASK;
|
|
if (!Bus::IsCacheableAddress(phys_addr))
|
|
break;
|
|
|
|
cbi.instruction.bits = Bus::ReadCacheableAddress(phys_addr);
|
|
if (!IsInvalidInstruction(cbi.instruction))
|
|
break;
|
|
|
|
cbi.pc = pc;
|
|
cbi.is_branch_delay_slot = is_branch_delay_slot;
|
|
cbi.is_load_delay_slot = is_load_delay_slot;
|
|
cbi.is_branch_instruction = IsBranchInstruction(cbi.instruction);
|
|
cbi.is_load_instruction = IsMemoryLoadInstruction(cbi.instruction);
|
|
cbi.is_store_instruction = IsMemoryStoreInstruction(cbi.instruction);
|
|
cbi.has_load_delay = InstructionHasLoadDelay(cbi.instruction);
|
|
cbi.can_trap = CanInstructionTrap(cbi.instruction, InUserMode());
|
|
|
|
// instruction is decoded now
|
|
block->instructions.push_back(cbi);
|
|
pc += sizeof(cbi.instruction.bits);
|
|
|
|
// if we're in a branch delay slot, the block is now done
|
|
// except if this is a branch in a branch delay slot, then we grab the one after that, and so on...
|
|
if (is_branch_delay_slot && !cbi.is_branch_instruction)
|
|
break;
|
|
|
|
// if this is a branch, we grab the next instruction (delay slot), and then exit
|
|
is_branch_delay_slot = cbi.is_branch_instruction;
|
|
|
|
// same for load delay
|
|
is_load_delay_slot = cbi.has_load_delay;
|
|
|
|
// is this a non-branchy exit? (e.g. syscall)
|
|
if (IsExitBlockInstruction(cbi.instruction))
|
|
break;
|
|
}
|
|
|
|
if (!block->instructions.empty())
|
|
{
|
|
block->instructions.back().is_last_instruction = true;
|
|
|
|
#ifdef _DEBUG
|
|
SmallString disasm;
|
|
Log_DebugPrintf("Block at 0x%08X", block->GetPC());
|
|
for (const CodeBlockInstruction& cbi : block->instructions)
|
|
{
|
|
CPU::DisassembleInstruction(&disasm, cbi.pc, cbi.instruction.bits, nullptr);
|
|
Log_DebugPrintf("[%s %s 0x%08X] %08X %s", cbi.is_branch_delay_slot ? "BD" : " ",
|
|
cbi.is_load_delay_slot ? "LD" : " ", cbi.pc, cbi.instruction.bits, disasm.GetCharArray());
|
|
}
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
Log_WarningPrintf("Empty block compiled at 0x%08X", block->key.GetPC());
|
|
return false;
|
|
}
|
|
|
|
#ifdef WITH_RECOMPILER
|
|
if (s_use_recompiler)
|
|
{
|
|
// Ensure we're not going to run out of space while compiling this block.
|
|
if (s_code_buffer.GetFreeCodeSpace() <
|
|
(block->instructions.size() * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) ||
|
|
s_code_buffer.GetFreeFarCodeSpace() <
|
|
(block->instructions.size() * Recompiler::MAX_FAR_HOST_BYTES_PER_INSTRUCTION))
|
|
{
|
|
Log_WarningPrintf("Out of code space, flushing all blocks.");
|
|
Flush();
|
|
}
|
|
|
|
Recompiler::CodeGenerator codegen(&s_code_buffer);
|
|
if (!codegen.CompileBlock(block, &block->host_code, &block->host_code_size))
|
|
{
|
|
Log_ErrorPrintf("Failed to compile host code for block at 0x%08X", block->key.GetPC());
|
|
return false;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
#ifdef WITH_RECOMPILER
|
|
|
|
void FastCompileBlockFunction()
|
|
{
|
|
CodeBlock* block = LookupBlock(GetNextBlockKey());
|
|
if (block)
|
|
block->host_code();
|
|
else
|
|
InterpretUncachedBlock();
|
|
}
|
|
|
|
#endif
|
|
|
|
void InvalidateBlocksWithPageIndex(u32 page_index)
|
|
{
|
|
DebugAssert(page_index < CPU_CODE_CACHE_PAGE_COUNT);
|
|
auto& blocks = m_ram_block_map[page_index];
|
|
for (CodeBlock* block : blocks)
|
|
{
|
|
// Invalidate forces the block to be checked again.
|
|
Log_DebugPrintf("Invalidating block at 0x%08X", block->GetPC());
|
|
block->invalidated = true;
|
|
#ifdef WITH_RECOMPILER
|
|
SetFastMap(block->GetPC(), FastCompileBlockFunction);
|
|
#endif
|
|
}
|
|
|
|
// Block will be re-added next execution.
|
|
blocks.clear();
|
|
Bus::ClearRAMCodePage(page_index);
|
|
}
|
|
|
|
void FlushBlock(CodeBlock* block)
|
|
{
|
|
BlockMap::iterator iter = s_blocks.find(block->key.GetPC());
|
|
Assert(iter != s_blocks.end() && iter->second == block);
|
|
Log_DevPrintf("Flushing block at address 0x%08X", block->GetPC());
|
|
|
|
#ifdef WITH_RECOMPILER
|
|
SetFastMap(block->GetPC(), FastCompileBlockFunction);
|
|
#endif
|
|
|
|
// if it's been invalidated it won't be in the page map
|
|
if (block->invalidated)
|
|
RemoveBlockFromPageMap(block);
|
|
|
|
UnlinkBlock(block);
|
|
|
|
s_blocks.erase(iter);
|
|
delete block;
|
|
}
|
|
|
|
void AddBlockToPageMap(CodeBlock* block)
|
|
{
|
|
if (!block->IsInRAM())
|
|
return;
|
|
|
|
const u32 start_page = block->GetStartPageIndex();
|
|
const u32 end_page = block->GetEndPageIndex();
|
|
for (u32 page = start_page; page <= end_page; page++)
|
|
{
|
|
m_ram_block_map[page].push_back(block);
|
|
Bus::SetRAMCodePage(page);
|
|
}
|
|
}
|
|
|
|
void RemoveBlockFromPageMap(CodeBlock* block)
|
|
{
|
|
if (!block->IsInRAM())
|
|
return;
|
|
|
|
const u32 start_page = block->GetStartPageIndex();
|
|
const u32 end_page = block->GetEndPageIndex();
|
|
for (u32 page = start_page; page <= end_page; page++)
|
|
{
|
|
auto& page_blocks = m_ram_block_map[page];
|
|
auto page_block_iter = std::find(page_blocks.begin(), page_blocks.end(), block);
|
|
Assert(page_block_iter != page_blocks.end());
|
|
page_blocks.erase(page_block_iter);
|
|
}
|
|
}
|
|
|
|
void LinkBlock(CodeBlock* from, CodeBlock* to)
|
|
{
|
|
Log_DebugPrintf("Linking block %p(%08x) to %p(%08x)", from, from->GetPC(), to, to->GetPC());
|
|
from->link_successors.push_back(to);
|
|
to->link_predecessors.push_back(from);
|
|
}
|
|
|
|
void UnlinkBlock(CodeBlock* block)
|
|
{
|
|
for (CodeBlock* predecessor : block->link_predecessors)
|
|
{
|
|
auto iter = std::find(predecessor->link_successors.begin(), predecessor->link_successors.end(), block);
|
|
Assert(iter != predecessor->link_successors.end());
|
|
predecessor->link_successors.erase(iter);
|
|
}
|
|
block->link_predecessors.clear();
|
|
|
|
for (CodeBlock* successor : block->link_successors)
|
|
{
|
|
auto iter = std::find(successor->link_predecessors.begin(), successor->link_predecessors.end(), block);
|
|
Assert(iter != successor->link_predecessors.end());
|
|
successor->link_predecessors.erase(iter);
|
|
}
|
|
block->link_successors.clear();
|
|
}
|
|
|
|
} // namespace CPU::CodeCache
|