JIT optimizations and refactoring (#675)
* CPU/Recompiler: Use rel32 call where possible for no-args * JitCodeBuffer: Support using preallocated buffer * CPU/Recompiler/AArch64: Use bl instead of blr for short branches * CPU/CodeCache: Allocate recompiler buffer in program space This means we don't need 64-bit moves for every call out of the recompiler. * GTE: Don't store as u16 and load as u32 * CPU/Recompiler: Add methods to emit global load/stores * GTE: Convert class to namespace * CPU/Recompiler: Call GTE functions directly * Settings: Turn into a global variable * GPU: Replace local pointers with global * InterruptController: Turn into a global pointer * System: Replace local pointers with global * Timers: Turn into a global instance * DMA: Turn into a global instance * SPU: Turn into a global instance * CDROM: Turn into a global instance * MDEC: Turn into a global instance * Pad: Turn into a global instance * SIO: Turn into a global instance * CDROM: Move audio FIFO to the heap * CPU/Recompiler: Drop ASMFunctions No longer needed since we have code in the same 4GB window. * CPUCodeCache: Turn class into namespace * Bus: Local pointer -> global pointers * CPU: Turn class into namespace * Bus: Turn into namespace * GTE: Store registers in CPU state struct Allows relative addressing on ARM. * CPU/Recompiler: Align code storage to page size * CPU/Recompiler: Fix relative branches on A64 * HostInterface: Local references to global * System: Turn into a namespace, move events out * Add guard pages * Android: Fix build
This commit is contained in:
committed by
GitHub
parent
1f9fc6ab74
commit
b6f871d2b9
@ -1,186 +1,105 @@
|
||||
#pragma once
|
||||
#include "common/bitfield.h"
|
||||
#include "cpu_types.h"
|
||||
#include "gte.h"
|
||||
#include "gte_types.h"
|
||||
#include "types.h"
|
||||
#include <array>
|
||||
#include <optional>
|
||||
|
||||
class StateWrapper;
|
||||
|
||||
class Bus;
|
||||
class System;
|
||||
|
||||
namespace CPU {
|
||||
|
||||
class CodeCache;
|
||||
|
||||
namespace Recompiler {
|
||||
class CodeGenerator;
|
||||
class Thunks;
|
||||
} // namespace Recompiler
|
||||
|
||||
class Core
|
||||
enum : VirtualMemoryAddress
|
||||
{
|
||||
public:
|
||||
static constexpr VirtualMemoryAddress RESET_VECTOR = UINT32_C(0xBFC00000);
|
||||
static constexpr PhysicalMemoryAddress DCACHE_LOCATION = UINT32_C(0x1F800000);
|
||||
static constexpr PhysicalMemoryAddress DCACHE_LOCATION_MASK = UINT32_C(0xFFFFFC00);
|
||||
static constexpr PhysicalMemoryAddress DCACHE_OFFSET_MASK = UINT32_C(0x000003FF);
|
||||
static constexpr PhysicalMemoryAddress DCACHE_SIZE = UINT32_C(0x00000400);
|
||||
|
||||
friend CodeCache;
|
||||
friend Recompiler::CodeGenerator;
|
||||
friend Recompiler::Thunks;
|
||||
|
||||
Core();
|
||||
~Core();
|
||||
|
||||
void Initialize(Bus* bus);
|
||||
void Reset();
|
||||
bool DoState(StateWrapper& sw);
|
||||
|
||||
void Execute();
|
||||
|
||||
ALWAYS_INLINE Bus* GetBus() const { return m_bus; }
|
||||
|
||||
ALWAYS_INLINE const Registers& GetRegs() const { return m_regs; }
|
||||
ALWAYS_INLINE Registers& GetRegs() { return m_regs; }
|
||||
|
||||
ALWAYS_INLINE TickCount GetPendingTicks() const { return m_pending_ticks; }
|
||||
ALWAYS_INLINE void ResetPendingTicks() { m_pending_ticks = 0; }
|
||||
ALWAYS_INLINE void AddPendingTicks(TickCount ticks) { m_pending_ticks += ticks; }
|
||||
|
||||
ALWAYS_INLINE TickCount GetDowncount() const { return m_downcount; }
|
||||
ALWAYS_INLINE void SetDowncount(TickCount downcount) { m_downcount = downcount; }
|
||||
|
||||
ALWAYS_INLINE const GTE::Core& GetCop2() const { return m_cop2; }
|
||||
ALWAYS_INLINE GTE::Core& GetCop2() { return m_cop2; }
|
||||
|
||||
// Sets the PC and flushes the pipeline.
|
||||
void SetPC(u32 new_pc);
|
||||
|
||||
// Memory reads variants which do not raise exceptions.
|
||||
bool SafeReadMemoryByte(VirtualMemoryAddress addr, u8* value);
|
||||
bool SafeReadMemoryHalfWord(VirtualMemoryAddress addr, u16* value);
|
||||
bool SafeReadMemoryWord(VirtualMemoryAddress addr, u32* value);
|
||||
bool SafeWriteMemoryByte(VirtualMemoryAddress addr, u8 value);
|
||||
bool SafeWriteMemoryHalfWord(VirtualMemoryAddress addr, u16 value);
|
||||
bool SafeWriteMemoryWord(VirtualMemoryAddress addr, u32 value);
|
||||
|
||||
// External IRQs
|
||||
void SetExternalInterrupt(u8 bit);
|
||||
void ClearExternalInterrupt(u8 bit);
|
||||
|
||||
private:
|
||||
template<MemoryAccessType type, MemoryAccessSize size>
|
||||
TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& value);
|
||||
|
||||
template<MemoryAccessType type, MemoryAccessSize size>
|
||||
bool DoAlignmentCheck(VirtualMemoryAddress address);
|
||||
|
||||
template<MemoryAccessType type, MemoryAccessSize size>
|
||||
void DoScratchpadAccess(PhysicalMemoryAddress address, u32& value);
|
||||
|
||||
bool ReadMemoryByte(VirtualMemoryAddress addr, u8* value);
|
||||
bool ReadMemoryHalfWord(VirtualMemoryAddress addr, u16* value);
|
||||
bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value);
|
||||
bool WriteMemoryByte(VirtualMemoryAddress addr, u8 value);
|
||||
bool WriteMemoryHalfWord(VirtualMemoryAddress addr, u16 value);
|
||||
bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value);
|
||||
|
||||
// state helpers
|
||||
ALWAYS_INLINE bool InUserMode() const { return m_cop0_regs.sr.KUc; }
|
||||
ALWAYS_INLINE bool InKernelMode() const { return !m_cop0_regs.sr.KUc; }
|
||||
|
||||
void DisassembleAndPrint(u32 addr);
|
||||
void DisassembleAndLog(u32 addr);
|
||||
void DisassembleAndPrint(u32 addr, u32 instructions_before, u32 instructions_after);
|
||||
|
||||
// Updates load delays - call after each instruction
|
||||
ALWAYS_INLINE void UpdateLoadDelay()
|
||||
{
|
||||
// the old value is needed in case the delay slot instruction overwrites the same register
|
||||
if (m_load_delay_reg != Reg::count)
|
||||
m_regs.r[static_cast<u8>(m_load_delay_reg)] = m_load_delay_value;
|
||||
|
||||
m_load_delay_reg = m_next_load_delay_reg;
|
||||
m_load_delay_value = m_next_load_delay_value;
|
||||
m_next_load_delay_reg = Reg::count;
|
||||
}
|
||||
|
||||
// Fetches the instruction at m_regs.npc
|
||||
bool FetchInstruction();
|
||||
void ExecuteInstruction();
|
||||
void ExecuteCop0Instruction();
|
||||
void ExecuteCop2Instruction();
|
||||
void Branch(u32 target);
|
||||
|
||||
// exceptions
|
||||
u32 GetExceptionVector(Exception excode) const;
|
||||
void RaiseException(Exception excode);
|
||||
void RaiseException(Exception excode, u32 EPC, bool BD, bool BT, u8 CE);
|
||||
bool HasPendingInterrupt();
|
||||
void DispatchInterrupt();
|
||||
|
||||
// clears pipeline of load/branch delays
|
||||
void FlushPipeline();
|
||||
|
||||
// helper functions for registers which aren't writable
|
||||
u32 ReadReg(Reg rs);
|
||||
void WriteReg(Reg rd, u32 value);
|
||||
|
||||
// helper for generating a load delay write
|
||||
void WriteRegDelayed(Reg rd, u32 value);
|
||||
|
||||
// write to cache control register
|
||||
void WriteCacheControl(u32 value);
|
||||
|
||||
// read/write cop0 regs
|
||||
std::optional<u32> ReadCop0Reg(Cop0Reg reg);
|
||||
void WriteCop0Reg(Cop0Reg reg, u32 value);
|
||||
|
||||
Bus* m_bus = nullptr;
|
||||
|
||||
// ticks the CPU has executed
|
||||
TickCount m_pending_ticks = 0;
|
||||
TickCount m_downcount = MAX_SLICE_SIZE;
|
||||
|
||||
Registers m_regs = {};
|
||||
Cop0Registers m_cop0_regs = {};
|
||||
Instruction m_next_instruction = {};
|
||||
|
||||
// address of the instruction currently being executed
|
||||
Instruction m_current_instruction = {};
|
||||
u32 m_current_instruction_pc = 0;
|
||||
bool m_current_instruction_in_branch_delay_slot = false;
|
||||
bool m_current_instruction_was_branch_taken = false;
|
||||
bool m_next_instruction_is_branch_delay_slot = false;
|
||||
bool m_branch_was_taken = false;
|
||||
bool m_exception_raised = false;
|
||||
bool m_interrupt_delay = false;
|
||||
|
||||
// load delays
|
||||
Reg m_load_delay_reg = Reg::count;
|
||||
u32 m_load_delay_value = 0;
|
||||
Reg m_next_load_delay_reg = Reg::count;
|
||||
u32 m_next_load_delay_value = 0;
|
||||
|
||||
u32 m_cache_control = 0;
|
||||
System* m_system = nullptr;
|
||||
|
||||
// data cache (used as scratchpad)
|
||||
std::array<u8, DCACHE_SIZE> m_dcache = {};
|
||||
|
||||
GTE::Core m_cop2;
|
||||
RESET_VECTOR = UINT32_C(0xBFC00000)
|
||||
};
|
||||
enum : PhysicalMemoryAddress
|
||||
{
|
||||
DCACHE_LOCATION = UINT32_C(0x1F800000),
|
||||
DCACHE_LOCATION_MASK = UINT32_C(0xFFFFFC00),
|
||||
DCACHE_OFFSET_MASK = UINT32_C(0x000003FF),
|
||||
DCACHE_SIZE = UINT32_C(0x00000400)
|
||||
};
|
||||
|
||||
extern bool TRACE_EXECUTION;
|
||||
extern bool LOG_EXECUTION;
|
||||
struct State
|
||||
{
|
||||
// ticks the CPU has executed
|
||||
TickCount pending_ticks = 0;
|
||||
TickCount downcount = MAX_SLICE_SIZE;
|
||||
|
||||
Registers regs = {};
|
||||
Cop0Registers cop0_regs = {};
|
||||
Instruction next_instruction = {};
|
||||
|
||||
// address of the instruction currently being executed
|
||||
Instruction current_instruction = {};
|
||||
u32 current_instruction_pc = 0;
|
||||
bool current_instruction_in_branch_delay_slot = false;
|
||||
bool current_instruction_was_branch_taken = false;
|
||||
bool next_instruction_is_branch_delay_slot = false;
|
||||
bool branch_was_taken = false;
|
||||
bool exception_raised = false;
|
||||
bool interrupt_delay = false;
|
||||
bool frame_done = false;
|
||||
|
||||
// load delays
|
||||
Reg load_delay_reg = Reg::count;
|
||||
u32 load_delay_value = 0;
|
||||
Reg next_load_delay_reg = Reg::count;
|
||||
u32 next_load_delay_value = 0;
|
||||
|
||||
u32 cache_control = 0;
|
||||
|
||||
// GTE registers are stored here so we can access them on ARM with a single instruction
|
||||
GTE::Regs gte_regs = {};
|
||||
|
||||
// data cache (used as scratchpad)
|
||||
std::array<u8, DCACHE_SIZE> dcache = {};
|
||||
};
|
||||
|
||||
extern State g_state;
|
||||
|
||||
void Initialize();
|
||||
void Shutdown();
|
||||
void Reset();
|
||||
bool DoState(StateWrapper& sw);
|
||||
|
||||
/// Executes interpreter loop.
|
||||
void Execute();
|
||||
|
||||
ALWAYS_INLINE Registers& GetRegs() { return g_state.regs; }
|
||||
|
||||
ALWAYS_INLINE TickCount GetPendingTicks() { return g_state.pending_ticks; }
|
||||
ALWAYS_INLINE void ResetPendingTicks() { g_state.pending_ticks = 0; }
|
||||
ALWAYS_INLINE void AddPendingTicks(TickCount ticks) { g_state.pending_ticks += ticks; }
|
||||
|
||||
// state helpers
|
||||
ALWAYS_INLINE bool InUserMode() { return g_state.cop0_regs.sr.KUc; }
|
||||
ALWAYS_INLINE bool InKernelMode() { return !g_state.cop0_regs.sr.KUc; }
|
||||
|
||||
// Memory reads variants which do not raise exceptions.
|
||||
bool SafeReadMemoryByte(VirtualMemoryAddress addr, u8* value);
|
||||
bool SafeReadMemoryHalfWord(VirtualMemoryAddress addr, u16* value);
|
||||
bool SafeReadMemoryWord(VirtualMemoryAddress addr, u32* value);
|
||||
bool SafeWriteMemoryByte(VirtualMemoryAddress addr, u8 value);
|
||||
bool SafeWriteMemoryHalfWord(VirtualMemoryAddress addr, u16 value);
|
||||
bool SafeWriteMemoryWord(VirtualMemoryAddress addr, u32 value);
|
||||
|
||||
// External IRQs
|
||||
void SetExternalInterrupt(u8 bit);
|
||||
void ClearExternalInterrupt(u8 bit);
|
||||
bool HasPendingInterrupt();
|
||||
void DispatchInterrupt();
|
||||
|
||||
void DisassembleAndPrint(u32 addr);
|
||||
void DisassembleAndLog(u32 addr);
|
||||
void DisassembleAndPrint(u32 addr, u32 instructions_before, u32 instructions_after);
|
||||
|
||||
// Write to CPU execution log file.
|
||||
void WriteToExecutionLog(const char* format, ...);
|
||||
|
||||
} // namespace CPU
|
||||
extern bool TRACE_EXECUTION;
|
||||
extern bool LOG_EXECUTION;
|
||||
|
||||
#include "cpu_core.inl"
|
||||
} // namespace CPU
|
||||
|
||||
Reference in New Issue
Block a user