System: Refactor main loop

Reduces JIT exits.
Improves runahead performance.
This commit is contained in:
Stenzek
2023-08-15 23:12:21 +10:00
parent 4ebd34fcb3
commit 5b980dafa5
43 changed files with 1343 additions and 923 deletions

View File

@ -21,6 +21,7 @@
#include "sio.h"
#include "spu.h"
#include "timers.h"
#include "timing_event.h"
#include "util/state_wrapper.h"
#include <cstdio>
#include <tuple>
@ -1418,7 +1419,7 @@ TickCount GetICacheFillTicks(VirtualMemoryAddress address)
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks)
{
VirtualMemoryAddress current_pc = g_state.regs.pc & ICACHE_TAG_ADDRESS_MASK;
VirtualMemoryAddress current_pc = g_state.pc & ICACHE_TAG_ADDRESS_MASK;
if (IsCachedAddress(current_pc))
{
TickCount ticks = 0;
@ -1541,10 +1542,20 @@ ALWAYS_INLINE static TickCount DoScratchpadAccess(PhysicalMemoryAddress address,
}
template<MemoryAccessType type, MemoryAccessSize size>
static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& value)
static ALWAYS_INLINE_RELEASE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& value)
{
using namespace Bus;
#if 0
if (type == MemoryAccessType::Write && address == 0x80113028)
{
if ((TimingEvents::GetGlobalTickCounter() + CPU::g_state.pending_ticks) == 5051485)
__debugbreak();
Log_WarningPrintf("VAL %08X @ %u", value, (TimingEvents::GetGlobalTickCounter() + CPU::g_state.pending_ticks));
}
#endif
switch (address >> 29)
{
case 0x00: // KUSEG 0M-512M
@ -1723,9 +1734,9 @@ static bool DoAlignmentCheck(VirtualMemoryAddress address)
bool FetchInstruction()
{
DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4));
DebugAssert(Common::IsAlignedPow2(g_state.npc, 4));
const PhysicalMemoryAddress address = g_state.regs.npc;
const PhysicalMemoryAddress address = g_state.npc;
switch (address >> 29)
{
case 0x00: // KUSEG 0M-512M
@ -1764,16 +1775,16 @@ bool FetchInstruction()
}
}
g_state.regs.pc = g_state.regs.npc;
g_state.regs.npc += sizeof(g_state.next_instruction.bits);
g_state.pc = g_state.npc;
g_state.npc += sizeof(g_state.next_instruction.bits);
return true;
}
bool FetchInstructionForInterpreterFallback()
{
DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4));
DebugAssert(Common::IsAlignedPow2(g_state.npc, 4));
const PhysicalMemoryAddress address = g_state.regs.npc;
const PhysicalMemoryAddress address = g_state.npc;
switch (address >> 29)
{
case 0x00: // KUSEG 0M-512M
@ -1801,8 +1812,8 @@ bool FetchInstructionForInterpreterFallback()
}
}
g_state.regs.pc = g_state.regs.npc;
g_state.regs.npc += sizeof(g_state.next_instruction.bits);
g_state.pc = g_state.npc;
g_state.npc += sizeof(g_state.next_instruction.bits);
return true;
}

View File

@ -196,6 +196,8 @@
<Import Project="core.props" />
<ItemDefinitionGroup>
<ClCompile>
<PreprocessorDefinitions>ZYDIS_DISABLE_ENCODER;ZYDIS_DISABLE_AVX512;ZYDIS_DISABLE_KNC;ZYDIS_STATIC_BUILD;ZYCORE_STATIC_BUILD;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories Condition="'$(Platform)'=='x64'">$(SolutionDir)dep\zydis\include;$(SolutionDir)dep\zydis\dependencies\zycore\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<ObjectFileName>$(IntDir)/%(RelativeDir)/</ObjectFileName>
</ClCompile>
</ItemDefinitionGroup>

View File

@ -8,6 +8,7 @@
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "cpu_disasm.h"
#include "cpu_recompiler_types.h"
#include "settings.h"
#include "system.h"
#include "timing_event.h"
@ -17,6 +18,8 @@ Log_SetChannel(CPU::CodeCache);
#include "cpu_recompiler_code_generator.h"
#endif
#include <zlib.h>
namespace CPU::CodeCache {
static constexpr bool USE_BLOCK_LINKING = true;
@ -50,6 +53,10 @@ alignas(Recompiler::CODE_STORAGE_ALIGNMENT) static u8
#endif
static JitCodeBuffer s_code_buffer;
#endif
#ifdef WITH_RECOMPILER
static FastMapTable s_fast_map[FAST_MAP_TABLE_COUNT];
static std::unique_ptr<CodeBlock::HostCodePointer[]> s_fast_map_pointers;
@ -253,12 +260,19 @@ void Initialize()
{
Panic("Failed to initialize code space");
}
}
#endif
AllocateFastMap();
#ifdef WITH_RECOMPILER
if (g_settings.IsUsingRecompiler())
{
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem");
AllocateFastMap();
CompileDispatcher();
ResetFastMap();
}
@ -293,22 +307,13 @@ void Shutdown()
}
template<PGXPMode pgxp_mode>
static void ExecuteImpl()
[[noreturn]] static void ExecuteImpl()
{
CodeBlockKey next_block_key;
g_using_interpreter = false;
g_state.frame_done = false;
while (!g_state.frame_done)
for (;;)
{
if (HasPendingInterrupt())
{
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
DispatchInterrupt();
}
TimingEvents::UpdateCPUDowncount();
TimingEvents::RunEvents();
next_block_key = GetNextBlockKey();
while (g_state.pending_ticks < g_state.downcount)
@ -384,27 +389,10 @@ static void ExecuteImpl()
}
}
}
TimingEvents::RunEvents();
}
// in case we switch to interpreter...
g_state.regs.npc = g_state.regs.pc;
}
void Execute()
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU>();
else
ExecuteImpl<PGXPMode::Memory>();
}
else
{
ExecuteImpl<PGXPMode::Disabled>();
}
g_state.npc = g_state.pc;
}
#ifdef WITH_RECOMPILER
@ -430,21 +418,15 @@ FastMapTable* GetFastMapPointer()
return s_fast_map;
}
void ExecuteRecompiler()
[[noreturn]] static void ExecuteRecompiler()
{
g_using_interpreter = false;
g_state.frame_done = false;
#if 0
while (!g_state.frame_done)
for (;;)
{
if (HasPendingInterrupt())
{
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
DispatchInterrupt();
}
TimingEvents::UpdateCPUDowncount();
TimingEvents::RunEvents();
while (g_state.pending_ticks < g_state.downcount)
{
@ -452,18 +434,50 @@ void ExecuteRecompiler()
LogCurrentState();
#endif
const u32 pc = g_state.regs.pc;
const u32 pc = g_state.pc;
s_single_block_asm_dispatcher(s_fast_map[pc >> 16][pc >> 2]);
}
TimingEvents::RunEvents();
}
#else
s_asm_dispatcher();
#endif
}
// in case we switch to interpreter...
g_state.regs.npc = g_state.regs.pc;
#endif
[[noreturn]] void Execute()
{
switch (g_settings.cpu_execution_mode)
{
#ifdef WITH_RECOMPILER
case CPUExecutionMode::Recompiler:
ExecuteRecompiler();
break;
#endif
default:
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU>();
else
ExecuteImpl<PGXPMode::Memory>();
}
else
{
ExecuteImpl<PGXPMode::Disabled>();
}
}
break;
}
}
#if defined(WITH_RECOMPILER)
JitCodeBuffer& GetCodeBuffer()
{
return s_code_buffer;
}
#endif
@ -473,13 +487,14 @@ void Reinitialize()
ClearState();
#ifdef WITH_RECOMPILER
ShutdownFastmem();
#endif
#if defined(WITH_RECOMPILER)
s_code_buffer.Destroy();
if (g_settings.IsUsingRecompiler())
{
#ifdef USE_STATIC_CODE_BUFFER
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
RECOMPILER_GUARD_SIZE))
@ -489,7 +504,12 @@ void Reinitialize()
{
Panic("Failed to initialize code space");
}
}
#endif
#ifdef WITH_RECOMPILER
if (g_settings.IsUsingRecompiler())
{
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem");
@ -509,25 +529,40 @@ void Flush()
#endif
}
#ifndef _MSC_VER
void __debugbreak() {}
#endif
void LogCurrentState()
{
#if 0
if ((TimingEvents::GetGlobalTickCounter() + GetPendingTicks()) == 2546728915)
__debugbreak();
#endif
#if 0
if ((TimingEvents::GetGlobalTickCounter() + GetPendingTicks()) < 2546729174)
return;
#endif
const auto& regs = g_state.regs;
WriteToExecutionLog("tick=%u pc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X ldr=%s "
"ldv=%08X\n",
TimingEvents::GetGlobalTickCounter() + GetPendingTicks(), regs.pc, regs.zero, regs.at, regs.v0,
regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5,
regs.t6, regs.t7, regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8,
regs.t9, regs.k0, regs.k1, regs.gp, regs.sp, regs.fp, regs.ra,
(g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg),
(g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value);
WriteToExecutionLog(
"tick=%u dc=%u/%u pc=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X ldr=%s "
"ldv=%08X cause=%08X sr=%08X gte=%08X\n",
TimingEvents::GetGlobalTickCounter() + GetPendingTicks(), g_state.pending_ticks, g_state.downcount, g_state.pc,
regs.at, regs.v0, regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5,
regs.t6, regs.t7, regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0,
regs.k1, regs.gp, regs.sp, regs.fp, regs.ra,
(g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg),
(g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value, g_state.cop0_regs.cause.bits,
g_state.cop0_regs.sr.bits, static_cast<u32>(crc32(0, (const Bytef*)&g_state.gte_regs, sizeof(g_state.gte_regs))));
}
CodeBlockKey GetNextBlockKey()
{
CodeBlockKey key = {};
key.SetPC(g_state.regs.pc);
key.SetPC(g_state.pc);
key.user_mode = InUserMode();
return key;
}
@ -836,7 +871,7 @@ void FastCompileBlockFunction()
void InvalidCodeFunction()
{
Log_ErrorPrintf("Trying to execute invalid code at 0x%08X", g_state.regs.pc);
Log_ErrorPrintf("Trying to execute invalid code at 0x%08X", g_state.pc);
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
@ -1249,7 +1284,7 @@ void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, voi
void CPU::Recompiler::Thunks::LogPC(u32 pc)
{
#if 0
#if 1
CPU::CodeCache::LogCurrentState();
#endif
#if 0

View File

@ -121,14 +121,17 @@ using FastMapTable = CodeBlock::HostCodePointer*;
void Initialize();
void Shutdown();
void Execute();
[[noreturn]] void Execute();
#ifdef WITH_RECOMPILER
using DispatcherFunction = void (*)();
using SingleBlockDispatcherFunction = void (*)(const CodeBlock::HostCodePointer);
FastMapTable* GetFastMapPointer();
void ExecuteRecompiler();
#endif
#if defined(WITH_RECOMPILER)
JitCodeBuffer& GetCodeBuffer();
#endif
/// Flushes the code cache, forcing all blocks to be recompiled.

View File

@ -4,6 +4,7 @@
#include "cpu_core.h"
#include "bus.h"
#include "common/align.h"
#include "common/fastjmp.h"
#include "common/file_system.h"
#include "common/log.h"
#include "cpu_core_private.h"
@ -29,9 +30,10 @@ static void Branch(u32 target);
static void FlushPipeline();
State g_state;
bool g_using_interpreter = false;
bool TRACE_EXECUTION = false;
static fastjmp_buf s_jmp_buf;
static std::FILE* s_log_file = nullptr;
static bool s_log_file_opened = false;
static bool s_trace_to_log = false;
@ -41,6 +43,7 @@ static std::vector<Breakpoint> s_breakpoints;
static u32 s_breakpoint_counter = 1;
static u32 s_last_breakpoint_check_pc = INVALID_BREAKPOINT_PC;
static bool s_single_step = false;
static bool s_single_step_done = false;
bool IsTraceEnabled()
{
@ -134,6 +137,7 @@ void Reset()
GTE::Reset();
// TODO: This consumes cycles...
SetPC(RESET_VECTOR);
}
@ -141,7 +145,9 @@ bool DoState(StateWrapper& sw)
{
sw.Do(&g_state.pending_ticks);
sw.Do(&g_state.downcount);
sw.DoArray(g_state.regs.r, countof(g_state.regs.r));
sw.DoArray(g_state.regs.r, static_cast<u32>(Reg::count));
sw.Do(&g_state.pc);
sw.Do(&g_state.npc);
sw.Do(&g_state.cop0_regs.BPC);
sw.Do(&g_state.cop0_regs.BDA);
sw.Do(&g_state.cop0_regs.TAR);
@ -161,11 +167,23 @@ bool DoState(StateWrapper& sw)
sw.Do(&g_state.next_instruction_is_branch_delay_slot);
sw.Do(&g_state.branch_was_taken);
sw.Do(&g_state.exception_raised);
sw.Do(&g_state.interrupt_delay);
if (sw.GetVersion() < 59)
{
bool interrupt_delay;
sw.Do(&interrupt_delay);
}
sw.Do(&g_state.load_delay_reg);
sw.Do(&g_state.load_delay_value);
sw.Do(&g_state.next_load_delay_reg);
sw.Do(&g_state.next_load_delay_value);
// Compatibility with old states.
if (sw.GetVersion() < 59)
{
g_state.load_delay_reg = static_cast<Reg>(std::min(static_cast<u8>(g_state.load_delay_reg), static_cast<u8>(Reg::count)));
g_state.next_load_delay_reg = static_cast<Reg>(std::min(static_cast<u8>(g_state.load_delay_reg), static_cast<u8>(Reg::count)));
}
sw.Do(&g_state.cache_control.bits);
sw.DoBytes(g_state.dcache.data(), g_state.dcache.size());
@ -203,7 +221,7 @@ void UpdateFastmemBase()
ALWAYS_INLINE_RELEASE void SetPC(u32 new_pc)
{
DebugAssert(Common::IsAlignedPow2(new_pc, 4));
g_state.regs.npc = new_pc;
g_state.npc = new_pc;
FlushPipeline();
}
@ -217,7 +235,7 @@ ALWAYS_INLINE_RELEASE void Branch(u32 target)
return;
}
g_state.regs.npc = target;
g_state.npc = target;
g_state.branch_was_taken = true;
}
@ -257,14 +275,14 @@ ALWAYS_INLINE_RELEASE static void RaiseException(u32 CAUSE_bits, u32 EPC, u32 ve
// TAR is set to the address which was being fetched in this instruction, or the next instruction to execute if the
// exception hadn't occurred in the delay slot.
g_state.cop0_regs.EPC -= UINT32_C(4);
g_state.cop0_regs.TAR = g_state.regs.pc;
g_state.cop0_regs.TAR = g_state.pc;
}
// current -> previous, switch to kernel mode and disable interrupts
g_state.cop0_regs.sr.mode_bits <<= 2;
// flush the pipeline - we don't want to execute the previously fetched instruction
g_state.regs.npc = vector;
g_state.npc = vector;
g_state.exception_raised = true;
FlushPipeline();
}
@ -299,7 +317,7 @@ void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits)
if (PCDrv::HandleSyscall(instruction_bits, g_state.regs))
{
// immediately return
g_state.regs.npc = EPC + 4;
g_state.npc = EPC + 4;
FlushPipeline();
return;
}
@ -311,16 +329,7 @@ void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits)
void SetExternalInterrupt(u8 bit)
{
g_state.cop0_regs.cause.Ip |= static_cast<u8>(1u << bit);
if (g_settings.cpu_execution_mode == CPUExecutionMode::Interpreter)
{
g_state.interrupt_delay = 1;
}
else
{
g_state.interrupt_delay = 0;
CheckForPendingInterrupt();
}
CheckForPendingInterrupt();
}
void ClearExternalInterrupt(u8 bit)
@ -331,9 +340,7 @@ void ClearExternalInterrupt(u8 bit)
ALWAYS_INLINE_RELEASE static void UpdateLoadDelay()
{
// the old value is needed in case the delay slot instruction overwrites the same register
if (g_state.load_delay_reg != Reg::count)
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.load_delay_reg = g_state.next_load_delay_reg;
g_state.load_delay_value = g_state.next_load_delay_value;
g_state.next_load_delay_reg = Reg::count;
@ -343,16 +350,13 @@ ALWAYS_INLINE_RELEASE static void FlushPipeline()
{
// loads are flushed
g_state.next_load_delay_reg = Reg::count;
if (g_state.load_delay_reg != Reg::count)
{
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.load_delay_reg = Reg::count;
}
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.load_delay_reg = Reg::count;
// not in a branch delay slot
g_state.branch_was_taken = false;
g_state.next_instruction_is_branch_delay_slot = false;
g_state.current_instruction_pc = g_state.regs.pc;
g_state.current_instruction_pc = g_state.pc;
// prefetch the next instruction
FetchInstruction();
@ -649,8 +653,8 @@ const std::array<DebuggerRegisterListEntry, NUM_DEBUGGER_REGISTER_LIST_ENTRIES>
{"ra", &CPU::g_state.regs.ra},
{"hi", &CPU::g_state.regs.hi},
{"lo", &CPU::g_state.regs.lo},
{"pc", &CPU::g_state.regs.pc},
{"npc", &CPU::g_state.regs.npc},
{"pc", &CPU::g_state.pc},
{"npc", &CPU::g_state.npc},
{"COP0_SR", &CPU::g_state.cop0_regs.sr.bits},
{"COP0_CAUSE", &CPU::g_state.cop0_regs.cause.bits},
@ -1111,7 +1115,7 @@ restart_instruction:
{
g_state.next_instruction_is_branch_delay_slot = true;
const u32 target = ReadReg(inst.r.rs);
WriteReg(inst.r.rd, g_state.regs.npc);
WriteReg(inst.r.rd, g_state.npc);
Branch(target);
}
break;
@ -1267,7 +1271,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, sxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LBx(inst.bits, sxvalue, addr);
PGXP::CPU_LBx(inst.bits, addr, sxvalue);
}
break;
@ -1285,7 +1289,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, sxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LHx(inst.bits, sxvalue, addr);
PGXP::CPU_LHx(inst.bits, addr, sxvalue);
}
break;
@ -1302,7 +1306,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LW(inst.bits, value, addr);
PGXP::CPU_LW(inst.bits, addr, value);
}
break;
@ -1320,7 +1324,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, zxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LBx(inst.bits, zxvalue, addr);
PGXP::CPU_LBx(inst.bits, addr, zxvalue);
}
break;
@ -1338,7 +1342,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, zxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LHx(inst.bits, zxvalue, addr);
PGXP::CPU_LHx(inst.bits, addr, zxvalue);
}
break;
@ -1372,7 +1376,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, new_value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LW(inst.bits, new_value, addr);
PGXP::CPU_LW(inst.bits, addr, new_value);
}
break;
@ -1386,7 +1390,7 @@ restart_instruction:
WriteMemoryByte(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SB(inst.bits, Truncate8(value), addr);
PGXP::CPU_SB(inst.bits, addr, value);
}
break;
@ -1400,7 +1404,7 @@ restart_instruction:
WriteMemoryHalfWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SH(inst.bits, Truncate16(value), addr);
PGXP::CPU_SH(inst.bits, addr, value);
}
break;
@ -1414,7 +1418,7 @@ restart_instruction:
WriteMemoryWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SW(inst.bits, value, addr);
PGXP::CPU_SW(inst.bits, addr, value);
}
break;
@ -1447,22 +1451,22 @@ restart_instruction:
WriteMemoryWord(aligned_addr, new_value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SW(inst.bits, new_value, addr);
PGXP::CPU_SW(inst.bits, aligned_addr, new_value);
}
break;
case InstructionOp::j:
{
g_state.next_instruction_is_branch_delay_slot = true;
Branch((g_state.regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
}
break;
case InstructionOp::jal:
{
WriteReg(Reg::ra, g_state.regs.npc);
WriteReg(Reg::ra, g_state.npc);
g_state.next_instruction_is_branch_delay_slot = true;
Branch((g_state.regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
}
break;
@ -1472,7 +1476,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (ReadReg(inst.i.rs) == ReadReg(inst.i.rt));
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1481,7 +1485,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (ReadReg(inst.i.rs) != ReadReg(inst.i.rt));
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1490,7 +1494,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (static_cast<s32>(ReadReg(inst.i.rs)) > 0);
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1499,7 +1503,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (static_cast<s32>(ReadReg(inst.i.rs)) <= 0);
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1515,10 +1519,10 @@ restart_instruction:
// register is still linked even if the branch isn't taken
const bool link = (rt & u8(0x1E)) == u8(0x10);
if (link)
WriteReg(Reg::ra, g_state.regs.npc);
WriteReg(Reg::ra, g_state.npc);
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1610,7 +1614,7 @@ restart_instruction:
WriteRegDelayed(inst.r.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_CFC2(inst.bits, value, value);
PGXP::CPU_MFC2(inst.bits, value);
}
break;
@ -1620,7 +1624,7 @@ restart_instruction:
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()) + 32, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_CTC2(inst.bits, value, value);
PGXP::CPU_MTC2(inst.bits, value);
}
break;
@ -1630,7 +1634,7 @@ restart_instruction:
WriteRegDelayed(inst.r.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_MFC2(inst.bits, value, value);
PGXP::CPU_MFC2(inst.bits, value);
}
break;
@ -1640,7 +1644,7 @@ restart_instruction:
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()), value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_MTC2(inst.bits, value, value);
PGXP::CPU_MTC2(inst.bits, value);
}
break;
@ -1674,7 +1678,7 @@ restart_instruction:
GTE::WriteRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())), value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LWC2(inst.bits, value, addr);
PGXP::CPU_LWC2(inst.bits, addr, value);
}
break;
@ -1694,7 +1698,7 @@ restart_instruction:
WriteMemoryWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SWC2(inst.bits, value, addr);
PGXP::CPU_SWC2(inst.bits, addr, value);
}
break;
@ -1734,7 +1738,7 @@ void DispatchInterrupt()
{
// If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
SafeReadInstruction(g_state.pc, &g_state.next_instruction.bits);
if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction())
{
StallUntilGTEComplete();
@ -1745,7 +1749,10 @@ void DispatchInterrupt()
RaiseException(
Cop0Registers::CAUSE::MakeValueForException(Exception::INT, g_state.next_instruction_is_branch_delay_slot,
g_state.branch_was_taken, g_state.next_instruction.cop.cop_n),
g_state.regs.pc);
g_state.pc);
// Fix up downcount, the pending IRQ set it to zero.
TimingEvents::UpdateCPUDowncount();
}
void UpdateDebugDispatcherFlag()
@ -1763,14 +1770,16 @@ void UpdateDebugDispatcherFlag()
Log_DevPrintf("%s debug dispatcher", use_debug_dispatcher ? "Now using" : "No longer using");
g_state.use_debug_dispatcher = use_debug_dispatcher;
ForceDispatcherExit();
ExitExecution();
}
void ForceDispatcherExit()
void ExitExecution()
{
// zero the downcount so we break out and switch
g_state.downcount = 0;
g_state.frame_done = true;
// can't exit while running events without messing things up
if (TimingEvents::IsRunningEvents())
TimingEvents::SetFrameDone();
else
fastjmp_jmp(&s_jmp_buf, 1);
}
bool HasAnyBreakpoints()
@ -1869,7 +1878,7 @@ void ClearBreakpoints()
bool AddStepOverBreakpoint()
{
u32 bp_pc = g_state.regs.pc;
u32 bp_pc = g_state.pc;
Instruction inst;
if (!SafeReadInstruction(bp_pc, &inst.bits))
@ -1880,7 +1889,7 @@ bool AddStepOverBreakpoint()
if (!IsCallInstruction(inst))
{
Host::ReportFormattedDebuggerMessage(Host::TranslateString("DebuggerMessage", "0x%08X is not a call instruction."),
g_state.regs.pc);
g_state.pc);
return false;
}
@ -1890,7 +1899,7 @@ bool AddStepOverBreakpoint()
if (IsBranchInstruction(inst))
{
Host::ReportFormattedDebuggerMessage(
Host::TranslateString("DebuggerMessage", "Can't step over double branch at 0x%08X"), g_state.regs.pc);
Host::TranslateString("DebuggerMessage", "Can't step over double branch at 0x%08X"), g_state.pc);
return false;
}
@ -1905,7 +1914,7 @@ bool AddStepOverBreakpoint()
bool AddStepOutBreakpoint(u32 max_instructions_to_search)
{
// find the branch-to-ra instruction.
u32 ret_pc = g_state.regs.pc;
u32 ret_pc = g_state.pc;
for (u32 i = 0; i < max_instructions_to_search; i++)
{
ret_pc += sizeof(Instruction);
@ -1929,21 +1938,24 @@ bool AddStepOutBreakpoint(u32 max_instructions_to_search)
Host::ReportFormattedDebuggerMessage(
Host::TranslateString("DebuggerMessage", "No return instruction found after %u instructions for step-out at %08X."),
max_instructions_to_search, g_state.regs.pc);
max_instructions_to_search, g_state.pc);
return false;
}
ALWAYS_INLINE_RELEASE static bool BreakpointCheck()
{
const u32 pc = g_state.regs.pc;
const u32 pc = g_state.pc;
// single step - we want to break out after this instruction, so set a pending exit
// the bp check happens just before execution, so this is fine
if (s_single_step)
{
ForceDispatcherExit();
s_single_step = false;
if (s_single_step_done)
ExitExecution();
else
s_single_step_done = true;
s_last_breakpoint_check_pc = pc;
return false;
}
@ -2004,19 +2016,14 @@ ALWAYS_INLINE_RELEASE static bool BreakpointCheck()
}
template<PGXPMode pgxp_mode, bool debug>
static void ExecuteImpl()
[[noreturn]] static void ExecuteImpl()
{
g_using_interpreter = true;
g_state.frame_done = false;
while (!g_state.frame_done)
for (;;)
{
TimingEvents::UpdateCPUDowncount();
TimingEvents::RunEvents();
while (g_state.pending_ticks < g_state.downcount)
{
if (HasPendingInterrupt() && !g_state.interrupt_delay)
DispatchInterrupt();
if constexpr (debug)
{
Cop0ExecutionBreakpointCheck();
@ -2028,12 +2035,11 @@ static void ExecuteImpl()
}
}
g_state.interrupt_delay = false;
g_state.pending_ticks++;
// now executing the instruction we previously fetched
g_state.current_instruction.bits = g_state.next_instruction.bits;
g_state.current_instruction_pc = g_state.regs.pc;
g_state.current_instruction_pc = g_state.pc;
g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot;
g_state.current_instruction_was_branch_taken = g_state.branch_was_taken;
g_state.next_instruction_is_branch_delay_slot = false;
@ -2065,27 +2071,10 @@ static void ExecuteImpl()
// next load delay
UpdateLoadDelay();
}
TimingEvents::RunEvents();
}
}
void Execute()
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU, false>();
else
ExecuteImpl<PGXPMode::Memory, false>();
}
else
{
ExecuteImpl<PGXPMode::Disabled, false>();
}
}
void ExecuteDebug()
static void ExecuteDebug()
{
if (g_settings.gpu_pgxp_enable)
{
@ -2100,11 +2089,56 @@ void ExecuteDebug()
}
}
void Execute()
{
const CPUExecutionMode exec_mode = g_settings.cpu_execution_mode;
const bool use_debug_dispatcher = g_state.use_debug_dispatcher;
if (fastjmp_set(&s_jmp_buf) != 0)
{
// Before we return, set npc to pc so that we can switch from recs to int.
if (exec_mode != CPUExecutionMode::Interpreter && !use_debug_dispatcher)
g_state.npc = g_state.pc;
return;
}
if (use_debug_dispatcher)
{
ExecuteDebug();
return;
}
switch (exec_mode)
{
case CPUExecutionMode::Recompiler:
case CPUExecutionMode::CachedInterpreter:
CodeCache::Execute();
break;
case CPUExecutionMode::Interpreter:
default:
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU, false>();
else
ExecuteImpl<PGXPMode::Memory, false>();
}
else
{
ExecuteImpl<PGXPMode::Disabled, false>();
}
}
break;
}
}
void SingleStep()
{
s_single_step = true;
ExecuteDebug();
Host::ReportFormattedDebuggerMessage("Stepped to 0x%08X.", g_state.regs.pc);
if (fastjmp_set(&s_jmp_buf) == 0)
ExecuteDebug();
Host::ReportFormattedDebuggerMessage("Stepped to 0x%08X.", g_state.pc);
}
namespace CodeCache {
@ -2113,8 +2147,8 @@ template<PGXPMode pgxp_mode>
void InterpretCachedBlock(const CodeBlock& block)
{
// set up the state so we've already fetched the instruction
DebugAssert(g_state.regs.pc == block.GetPC());
g_state.regs.npc = block.GetPC() + 4;
DebugAssert(g_state.pc == block.GetPC());
g_state.npc = block.GetPC() + 4;
for (const CodeBlockInstruction& cbi : block.instructions)
{
@ -2129,8 +2163,8 @@ void InterpretCachedBlock(const CodeBlock& block)
g_state.exception_raised = false;
// update pc
g_state.regs.pc = g_state.regs.npc;
g_state.regs.npc += 4;
g_state.pc = g_state.npc;
g_state.npc += 4;
// execute the instruction we previously fetched
ExecuteInstruction<pgxp_mode, false>();
@ -2153,7 +2187,7 @@ template void InterpretCachedBlock<PGXPMode::CPU>(const CodeBlock& block);
template<PGXPMode pgxp_mode>
void InterpretUncachedBlock()
{
g_state.regs.npc = g_state.regs.pc;
g_state.npc = g_state.pc;
if (!FetchInstructionForInterpreterFallback())
return;
@ -2166,7 +2200,7 @@ void InterpretUncachedBlock()
// now executing the instruction we previously fetched
g_state.current_instruction.bits = g_state.next_instruction.bits;
g_state.current_instruction_pc = g_state.regs.pc;
g_state.current_instruction_pc = g_state.pc;
g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot;
g_state.current_instruction_was_branch_taken = g_state.branch_was_taken;
g_state.next_instruction_is_branch_delay_slot = false;
@ -2182,7 +2216,7 @@ void InterpretUncachedBlock()
}
else
{
g_state.regs.pc = g_state.regs.npc;
g_state.pc = g_state.npc;
}
// execute the instruction we previously fetched

View File

@ -56,7 +56,9 @@ struct State
Registers regs = {};
Cop0Registers cop0_regs = {};
Instruction next_instruction = {};
u32 pc; // at execution time: the address of the next instruction to execute (already fetched)
u32 npc; // at execution time: the address of the next instruction to fetch
// address of the instruction currently being executed
Instruction current_instruction = {};
@ -66,15 +68,14 @@ struct State
bool next_instruction_is_branch_delay_slot = false;
bool branch_was_taken = false;
bool exception_raised = false;
bool interrupt_delay = false;
bool frame_done = false;
// load delays
Reg load_delay_reg = Reg::count;
u32 load_delay_value = 0;
Reg next_load_delay_reg = Reg::count;
u32 load_delay_value = 0;
u32 next_load_delay_value = 0;
Instruction next_instruction = {};
CacheControl cache_control{0};
// GTE registers are stored here so we can access them on ARM with a single instruction
@ -95,7 +96,6 @@ struct State
};
extern State g_state;
extern bool g_using_interpreter;
void Initialize();
void Shutdown();
@ -106,38 +106,37 @@ void UpdateFastmemBase();
/// Executes interpreter loop.
void Execute();
void ExecuteDebug();
void SingleStep();
// Forces an early exit from the CPU dispatcher.
void ForceDispatcherExit();
void ExitExecution();
ALWAYS_INLINE Registers& GetRegs()
ALWAYS_INLINE static Registers& GetRegs()
{
return g_state.regs;
}
ALWAYS_INLINE TickCount GetPendingTicks()
ALWAYS_INLINE static TickCount GetPendingTicks()
{
return g_state.pending_ticks;
}
ALWAYS_INLINE void ResetPendingTicks()
ALWAYS_INLINE static void ResetPendingTicks()
{
g_state.gte_completion_tick =
(g_state.pending_ticks < g_state.gte_completion_tick) ? (g_state.gte_completion_tick - g_state.pending_ticks) : 0;
g_state.pending_ticks = 0;
}
ALWAYS_INLINE void AddPendingTicks(TickCount ticks)
ALWAYS_INLINE static void AddPendingTicks(TickCount ticks)
{
g_state.pending_ticks += ticks;
}
// state helpers
ALWAYS_INLINE bool InUserMode()
ALWAYS_INLINE static bool InUserMode()
{
return g_state.cop0_regs.sr.KUc;
}
ALWAYS_INLINE bool InKernelMode()
ALWAYS_INLINE static bool InKernelMode()
{
return !g_state.cop0_regs.sr.KUc;
}

View File

@ -12,13 +12,13 @@ void RaiseException(Exception excode);
void RaiseException(u32 CAUSE_bits, u32 EPC);
void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits);
ALWAYS_INLINE bool HasPendingInterrupt()
ALWAYS_INLINE static bool HasPendingInterrupt()
{
return g_state.cop0_regs.sr.IEc &&
(((g_state.cop0_regs.cause.bits & g_state.cop0_regs.sr.bits) & (UINT32_C(0xFF) << 8)) != 0);
}
ALWAYS_INLINE void CheckForPendingInterrupt()
ALWAYS_INLINE static void CheckForPendingInterrupt()
{
if (HasPendingInterrupt())
g_state.downcount = 0;
@ -28,36 +28,36 @@ void DispatchInterrupt();
void UpdateDebugDispatcherFlag();
// icache stuff
ALWAYS_INLINE bool IsCachedAddress(VirtualMemoryAddress address)
ALWAYS_INLINE static bool IsCachedAddress(VirtualMemoryAddress address)
{
// KUSEG, KSEG0
return (address >> 29) <= 4;
}
ALWAYS_INLINE u32 GetICacheLine(VirtualMemoryAddress address)
ALWAYS_INLINE static u32 GetICacheLine(VirtualMemoryAddress address)
{
return ((address >> 4) & 0xFFu);
}
ALWAYS_INLINE u32 GetICacheLineOffset(VirtualMemoryAddress address)
ALWAYS_INLINE static u32 GetICacheLineOffset(VirtualMemoryAddress address)
{
return (address & (ICACHE_LINE_SIZE - 1));
}
ALWAYS_INLINE u32 GetICacheTagForAddress(VirtualMemoryAddress address)
ALWAYS_INLINE static u32 GetICacheTagForAddress(VirtualMemoryAddress address)
{
return (address & ICACHE_TAG_ADDRESS_MASK);
}
ALWAYS_INLINE u32 GetICacheFillTagForAddress(VirtualMemoryAddress address)
ALWAYS_INLINE static u32 GetICacheFillTagForAddress(VirtualMemoryAddress address)
{
static const u32 invalid_bits[4] = {0, 1, 3, 7};
return GetICacheTagForAddress(address) | invalid_bits[(address >> 2) & 0x03u];
}
ALWAYS_INLINE u32 GetICacheTagMaskForAddress(VirtualMemoryAddress address)
ALWAYS_INLINE static u32 GetICacheTagMaskForAddress(VirtualMemoryAddress address)
{
static const u32 mask[4] = {ICACHE_TAG_ADDRESS_MASK | 1, ICACHE_TAG_ADDRESS_MASK | 2, ICACHE_TAG_ADDRESS_MASK | 4,
ICACHE_TAG_ADDRESS_MASK | 8};
return mask[(address >> 2) & 0x03u];
}
ALWAYS_INLINE bool CompareICacheTag(VirtualMemoryAddress address)
ALWAYS_INLINE static bool CompareICacheTag(VirtualMemoryAddress address)
{
const u32 line = GetICacheLine(address);
return ((g_state.icache_tags[line] & GetICacheTagMaskForAddress(address)) == GetICacheTagForAddress(address));
@ -68,7 +68,7 @@ TickCount GetICacheFillTicks(VirtualMemoryAddress address);
u32 FillICache(VirtualMemoryAddress address);
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks);
ALWAYS_INLINE Segment GetSegmentForAddress(VirtualMemoryAddress address)
ALWAYS_INLINE static Segment GetSegmentForAddress(VirtualMemoryAddress address)
{
switch ((address >> 29))
{
@ -91,12 +91,12 @@ ALWAYS_INLINE Segment GetSegmentForAddress(VirtualMemoryAddress address)
}
}
ALWAYS_INLINE PhysicalMemoryAddress VirtualAddressToPhysical(VirtualMemoryAddress address)
ALWAYS_INLINE static constexpr PhysicalMemoryAddress VirtualAddressToPhysical(VirtualMemoryAddress address)
{
return (address & PHYSICAL_MEMORY_ADDRESS_MASK);
}
ALWAYS_INLINE VirtualMemoryAddress PhysicalAddressToVirtual(PhysicalMemoryAddress address, Segment segment)
ALWAYS_INLINE static VirtualMemoryAddress PhysicalAddressToVirtual(PhysicalMemoryAddress address, Segment segment)
{
static constexpr std::array<VirtualMemoryAddress, 4> bases = {{0x00000000, 0x80000000, 0xA0000000, 0xE0000000}};
return bases[static_cast<u32>(segment)] | address;
@ -115,12 +115,12 @@ bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value);
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks);
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size);
ALWAYS_INLINE void AddGTETicks(TickCount ticks)
ALWAYS_INLINE static void AddGTETicks(TickCount ticks)
{
g_state.gte_completion_tick = g_state.pending_ticks + ticks + 1;
}
ALWAYS_INLINE void StallUntilGTEComplete()
ALWAYS_INLINE static void StallUntilGTEComplete()
{
g_state.pending_ticks =
(g_state.gte_completion_tick > g_state.pending_ticks) ? g_state.gte_completion_tick : g_state.pending_ticks;

View File

@ -1156,7 +1156,7 @@ Value CodeGenerator::GetCurrentInstructionPC(u32 offset /* = 0 */)
void CodeGenerator::WriteNewPC(const Value& value, bool commit)
{
// TODO: This _could_ be moved into the register cache, but would it gain anything?
EmitStoreGuestRegister(Reg::pc, value);
EmitStoreCPUStructField(offsetof(CPU::State, pc), value);
if (commit)
{
m_pc_valid = value.IsConstant();
@ -1450,7 +1450,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi)
result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_8);
ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lb));
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(cbi.instruction.bits), result, address);
EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(cbi.instruction.bits), address, result);
if (address_spec)
{
@ -1468,7 +1468,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi)
ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lh));
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LHx, Value::FromConstantU32(cbi.instruction.bits), result, address);
EmitFunctionCall(nullptr, PGXP::CPU_LHx, Value::FromConstantU32(cbi.instruction.bits), address, result);
if (address_spec)
{
@ -1483,7 +1483,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi)
{
result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_32);
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), result, address);
EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), address, result);
if (address_spec)
value_spec = SpeculativeReadMemory(*address_spec);
@ -1522,10 +1522,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi)
case InstructionOp::sb:
{
if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(cbi.instruction.bits),
value.ViewAsSize(RegSize_8), address);
}
EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(cbi.instruction.bits), address, value);
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_8, value);
@ -1553,10 +1550,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi)
case InstructionOp::sh:
{
if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(cbi.instruction.bits),
value.ViewAsSize(RegSize_16), address);
}
EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(cbi.instruction.bits), address, value);
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_16, value);
@ -1584,7 +1578,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi)
case InstructionOp::sw:
{
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), value, address);
EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), address, value);
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_32, value);
@ -1688,7 +1682,7 @@ bool CodeGenerator::Compile_LoadLeftRight(const CodeBlockInstruction& cbi)
shift.ReleaseAndClear();
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), mem, address);
EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), address, mem);
m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.i.rt, std::move(mem));
@ -1751,7 +1745,7 @@ bool CodeGenerator::Compile_StoreLeftRight(const CodeBlockInstruction& cbi)
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_32, mem);
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), mem, address);
EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), address, mem);
InstructionEpilogue(cbi);
return true;
@ -2950,7 +2944,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
DoGTERegisterWrite(reg, value);
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(cbi.instruction.bits), value, address);
EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(cbi.instruction.bits), address, value);
}
else
{
@ -2958,7 +2952,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
EmitStoreGuestMemory(cbi, address, spec_address, RegSize_32, value);
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(cbi.instruction.bits), value, address);
EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(cbi.instruction.bits), address, value);
SpeculativeValue spec_base = SpeculativeReadReg(cbi.instruction.i.rs);
if (spec_base)
@ -2988,11 +2982,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
// PGXP done first here before ownership is transferred.
if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(
nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? PGXP::CPU_CFC2 : PGXP::CPU_MFC2,
Value::FromConstantU32(cbi.instruction.bits), value, value);
}
EmitFunctionCall(nullptr, PGXP::CPU_MFC2, Value::FromConstantU32(cbi.instruction.bits), value);
m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.r.rt, std::move(value));
SpeculativeWriteReg(cbi.instruction.r.rt, std::nullopt);
@ -3014,11 +3004,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
DoGTERegisterWrite(reg, value);
if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(
nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? PGXP::CPU_CTC2 : PGXP::CPU_MTC2,
Value::FromConstantU32(cbi.instruction.bits), value, value);
}
EmitFunctionCall(nullptr, PGXP::CPU_MTC2, Value::FromConstantU32(cbi.instruction.bits), value);
InstructionEpilogue(cbi);
return true;

View File

@ -32,9 +32,6 @@ constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224
constexpr u32 FUNCTION_STACK_SIZE =
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
// PC we return to after the end of the block
static void* s_dispatcher_return_address;
static s32 GetPCDisplacement(const void* current, const void* target)
{
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
@ -201,10 +198,7 @@ void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_ret
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
if (emit_return)
{
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->bx(a32::lr);
}
}
void CodeGenerator::EmitExceptionExit()
@ -219,7 +213,6 @@ void CodeGenerator::EmitExceptionExit()
m_register_cache.PopCalleeSavedRegisters(false);
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->bx(a32::lr);
}
@ -2072,64 +2065,16 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
EmitLoadGlobalAddress(RCPUPTR, &g_state);
a32::Label frame_done_loop;
a32::Label exit_dispatcher;
m_emit->Bind(&frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->ldrb(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, frame_done)));
m_emit->tst(a32::r0, 1);
m_emit->b(a32::ne, &exit_dispatcher);
// r0 <- sr
a32::Label no_interrupt;
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, cop0_regs.sr.bits)));
// if Iec == 0 then goto no_interrupt
m_emit->tst(a32::r0, 1);
m_emit->b(a32::eq, &no_interrupt);
// r1 <- cause
// r0 (sr) & cause
m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, cop0_regs.cause.bits)));
m_emit->and_(a32::r0, a32::r0, a32::r1);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->tst(a32::r0, 0xFF00);
m_emit->b(a32::eq, &no_interrupt);
// we have an interrupt
EmitCall(reinterpret_cast<const void*>(&DispatchInterrupt));
// no interrupt or we just serviced it
m_emit->Bind(&no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// r0 <- head event->downcount
// downcount <- r0
EmitLoadGlobalAddress(0, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a32::r0, a32::MemOperand(a32::r0));
m_emit->ldr(a32::r0, a32::MemOperand(a32::r0, offsetof(TimingEvent, m_downcount)));
m_emit->str(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount)));
a32::Label event_test;
m_emit->b(&event_test);
// main dispatch loop
a32::Label main_loop;
m_emit->Bind(&main_loop);
s_dispatcher_return_address = GetCurrentCodePointer();
// r0 <- pending_ticks
// r1 <- downcount
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pending_ticks)));
m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
a32::Label downcount_hit;
m_emit->cmp(a32::r0, a32::r1);
m_emit->b(a32::ge, &downcount_hit);
// time to lookup the block
// r0 <- pc
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, regs.pc)));
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pc)));
// r1 <- s_fast_map[pc >> 16]
EmitLoadGlobalAddress(2, CodeCache::GetFastMapPointer());
@ -2140,21 +2085,20 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
m_emit->ldr(a32::r0, a32::MemOperand(a32::r1, a32::r0));
m_emit->blx(a32::r0);
// end while
m_emit->Bind(&downcount_hit);
// check events then for frame done
// r0 <- pending_ticks
// r1 <- downcount
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pending_ticks)));
EmitLoadGlobalAddress(1, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a32::r1, a32::MemOperand(a32::r1));
m_emit->ldr(a32::r1, a32::MemOperand(a32::r1, offsetof(TimingEvent, m_downcount)));
m_emit->cmp(a32::r0, a32::r1);
m_emit->b(a32::lt, &frame_done_loop);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->b(&frame_done_loop);
m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount)));
// all done
m_emit->Bind(&exit_dispatcher);
// while downcount < pending_ticks
a32::Label downcount_hit;
m_emit->cmp(a32::r0, a32::r1);
m_emit->b(a32::lt, &main_loop);
// end while
m_emit->Bind(&event_test);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->b(&main_loop);
RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true);

View File

@ -30,9 +30,6 @@ constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224
constexpr u64 FUNCTION_STACK_SIZE =
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
// PC we return to after the end of the block
static void* s_dispatcher_return_address;
static s64 GetPCDisplacement(const void* current, const void* target)
{
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
@ -256,7 +253,6 @@ void CodeGenerator::EmitExceptionExit()
m_register_cache.PopCalleeSavedRegisters(false);
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->Ret();
}
@ -2278,62 +2274,16 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
EmitLoadGlobalAddress(RCPUPTR, &g_state);
a64::Label frame_done_loop;
a64::Label exit_dispatcher;
m_emit->Bind(&frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->ldrb(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, frame_done)));
m_emit->tbnz(a64::w8, 0, &exit_dispatcher);
// x8 <- sr
a64::Label no_interrupt;
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.sr.bits)));
// if Iec == 0 then goto no_interrupt
m_emit->tbz(a64::w8, 0, &no_interrupt);
// x9 <- cause
// x8 (sr) & cause
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.cause.bits)));
m_emit->and_(a64::w8, a64::w8, a64::w9);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->tst(a64::w8, 0xFF00);
m_emit->b(&no_interrupt, a64::eq);
// we have an interrupt
EmitCall(reinterpret_cast<const void*>(&DispatchInterrupt));
// no interrupt or we just serviced it
m_emit->Bind(&no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// x8 <- head event->downcount
// downcount <- x8
EmitLoadGlobalAddress(8, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a64::x8, a64::MemOperand(a64::x8));
m_emit->ldr(a64::w8, a64::MemOperand(a64::x8, offsetof(TimingEvent, m_downcount)));
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
a64::Label event_test;
m_emit->b(&event_test);
// main dispatch loop
a64::Label main_loop;
m_emit->Bind(&main_loop);
s_dispatcher_return_address = GetCurrentCodePointer();
// w8 <- pending_ticks
// w9 <- downcount
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
a64::Label downcount_hit;
m_emit->cmp(a64::w8, a64::w9);
m_emit->b(&downcount_hit, a64::ge);
// time to lookup the block
// w8 <- pc
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, regs.pc)));
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pc)));
// x9 <- s_fast_map[pc >> 16]
EmitLoadGlobalAddress(10, CodeCache::GetFastMapPointer());
@ -2345,21 +2295,20 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3));
m_emit->blr(a64::x8);
// end while
m_emit->Bind(&downcount_hit);
// check events then for frame done
// w8 <- pending_ticks
// w9 <- downcount
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
EmitLoadGlobalAddress(9, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a64::x9, a64::MemOperand(a64::x9));
m_emit->ldr(a64::w9, a64::MemOperand(a64::x9, offsetof(TimingEvent, m_downcount)));
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
m_emit->cmp(a64::w8, a64::w9);
m_emit->b(&frame_done_loop, a64::lt);
m_emit->b(&main_loop, a64::lt);
m_emit->Bind(&event_test);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->b(&frame_done_loop);
m_emit->b(&main_loop);
// all done
m_emit->Bind(&exit_dispatcher);
RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);

View File

@ -3024,59 +3024,17 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state);
Xbyak::Label frame_done_loop;
Xbyak::Label exit_dispatcher;
m_emit->L(frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->test(m_emit->byte[m_emit->rbp + offsetof(State, frame_done)], 1);
m_emit->jnz(exit_dispatcher, Xbyak::CodeGenerator::T_NEAR);
// eax <- sr
Xbyak::Label no_interrupt;
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.sr.bits)]);
// if Iec == 0 then goto no_interrupt
m_emit->test(m_emit->eax, 1);
m_emit->jz(no_interrupt);
// sr & cause
m_emit->and_(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.cause.bits)]);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->test(m_emit->eax, 0xFF00);
m_emit->jz(no_interrupt);
// we have an interrupt
EmitCall(reinterpret_cast<const void*>(&DispatchInterrupt));
// no interrupt or we just serviced it
m_emit->L(no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// eax <- head event->downcount
// downcount <- eax
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, downcount)], m_emit->eax);
Xbyak::Label event_test;
m_emit->jmp(event_test);
// main dispatch loop
Xbyak::Label main_loop;
m_emit->align(16);
m_emit->L(main_loop);
// eax <- pending_ticks
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
// while eax < downcount
Xbyak::Label downcount_hit;
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]);
m_emit->jge(downcount_hit);
// time to lookup the block
// eax <- pc
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, regs.pc)]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pc)]);
// rcx <- s_fast_map[pc >> 16]
EmitLoadGlobalAddress(Xbyak::Operand::RBX, CodeCache::GetFastMapPointer());
@ -3087,22 +3045,19 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
// call(rcx[pc * 2]) (fast_map[pc >> 2])
m_emit->call(m_emit->qword[m_emit->rcx + m_emit->rax * 2]);
// eax <- pending_ticks
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
// while eax < downcount
Xbyak::Label downcount_hit;
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]);
m_emit->jl(main_loop);
m_emit->L(event_test);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->jmp(main_loop);
// end while
m_emit->L(downcount_hit);
// check events then for frame done
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
m_emit->jg(frame_done_loop);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->jmp(frame_done_loop);
// all done
m_emit->L(exit_dispatcher);
RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->ret();

View File

@ -130,6 +130,13 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
// Alignment of code stoarge.
constexpr u32 CODE_STORAGE_ALIGNMENT = 4096;
#elif defined(CPU_RISCV64)
using HostReg = unsigned;
// Alignment of code stoarge.
constexpr u32 CODE_STORAGE_ALIGNMENT = 4096;
#else
using HostReg = int;

View File

@ -60,12 +60,8 @@ enum class Reg : u8
sp,
fp,
ra,
// not accessible to instructions
hi,
lo,
pc,
npc,
count
};
@ -213,6 +209,7 @@ union Instruction
}
ALWAYS_INLINE Cop0Instruction Cop0Op() const { return static_cast<Cop0Instruction>(bits & UINT32_C(0x3F)); }
ALWAYS_INLINE u32 Cop2Index() const { return ((bits >> 11) & 0x1F) | ((bits >> 17) & 0x20); }
} cop;
bool IsCop2Instruction() const
@ -240,7 +237,7 @@ struct Registers
{
union
{
u32 r[static_cast<u8>(Reg::count)];
u32 r[static_cast<u8>(Reg::count) + 1]; // +1 for the dummy load delay write slot
struct
{
@ -276,12 +273,8 @@ struct Registers
u32 sp; // r29
u32 fp; // r30
u32 ra; // r31
// not accessible to instructions
u32 hi;
u32 lo;
u32 pc; // at execution time: the address of the next instruction to execute (already fetched)
u32 npc; // at execution time: the address of the next instruction to fetch
};
};
};

View File

@ -107,7 +107,7 @@ static const std::array<u32*, 38> REGISTERS {
&CPU::g_state.regs.hi,
&CPU::g_state.cop0_regs.BadVaddr,
&CPU::g_state.cop0_regs.cause.bits,
&CPU::g_state.regs.pc,
&CPU::g_state.pc,
};
/// Number of registers in GDB remote protocol for MIPS III.

View File

@ -901,9 +901,10 @@ void GPU::CRTCTickEvent(TickCount ticks)
InterruptController::InterruptRequest(InterruptController::IRQ::VBLANK);
// flush any pending draws and "scan out" the image
// TODO: move present in here I guess
FlushRender();
UpdateDisplay();
System::FrameDone();
TimingEvents::SetFrameDone();
// switch fields early. this is needed so we draw to the correct one.
if (m_GPUSTAT.InInterleaved480iMode())

View File

@ -4,13 +4,13 @@
#include "gte.h"
#include "common/assert.h"
#include "common/bitutils.h"
#include "util/state_wrapper.h"
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "host_display.h"
#include "pgxp.h"
#include "settings.h"
#include "timing_event.h"
#include "util/state_wrapper.h"
#include <algorithm>
#include <array>
#include <numeric>
@ -471,11 +471,12 @@ ALWAYS_INLINE static u32 UNRDivide(u32 lhs, u32 rhs)
return std::min<u32>(0x1FFFF, result);
}
static void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
static void MulMatVec(const s16* M_, const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
{
#define M(i, j) M_[((i)*3) + (j)]
#define dot3(i) \
TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M[i][0]) * s64(Vx)) + (s64(M[i][1]) * s64(Vy))) + \
(s64(M[i][2]) * s64(Vz)), \
TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M(i, 0)) * s64(Vx)) + (s64(M(i, 1)) * s64(Vy))) + \
(s64(M(i, 2)) * s64(Vz)), \
shift, lm)
dot3(0);
@ -483,15 +484,17 @@ static void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 V
dot3(2);
#undef dot3
#undef M
}
static void MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
static void MulMatVec(const s16* M_, const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
{
#define M(i, j) M_[((i)*3) + (j)]
#define dot3(i) \
TruncateAndSetMACAndIR<i + 1>( \
SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>((s64(T[i]) << 12) + (s64(M[i][0]) * s64(Vx))) + \
(s64(M[i][1]) * s64(Vy))) + \
(s64(M[i][2]) * s64(Vz)), \
SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>((s64(T[i]) << 12) + (s64(M(i, 0)) * s64(Vx))) + \
(s64(M(i, 1)) * s64(Vy))) + \
(s64(M(i, 2)) * s64(Vz)), \
shift, lm)
dot3(0);
@ -499,19 +502,20 @@ static void MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16
dot3(2);
#undef dot3
#undef M
}
static void MulMatVecBuggy(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift,
bool lm)
static void MulMatVecBuggy(const s16* M_, const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
{
#define M(i, j) M_[((i)*3) + (j)]
#define dot3(i) \
do \
{ \
TruncateAndSetIR<i + 1>(static_cast<s32>(SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>( \
(s64(T[i]) << 12) + (s64(M[i][0]) * s64(Vx)))) >> \
(s64(T[i]) << 12) + (s64(M(i, 0)) * s64(Vx)))) >> \
shift), \
false); \
TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M[i][1]) * s64(Vy))) + (s64(M[i][2]) * s64(Vz)), \
TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M(i, 1)) * s64(Vy))) + (s64(M(i, 2)) * s64(Vz)), \
shift, lm); \
} while (0)
@ -520,82 +524,50 @@ static void MulMatVecBuggy(const s16 M[3][3], const s32 T[3], const s16 Vx, cons
dot3(2);
#undef dot3
#undef M
}
static void Execute_MVMVA(Instruction inst)
{
REGS.FLAG.Clear();
// TODO: Remove memcpy..
s16 M[3][3];
switch (inst.mvmva_multiply_matrix)
static constexpr const s16* M_lookup[4] = {&REGS.RT[0][0], &REGS.LLM[0][0], &REGS.LCM[0][0], nullptr};
static constexpr const s16* V_lookup[4][3] = {
{&REGS.V0[0], &REGS.V0[1], &REGS.V0[2]},
{&REGS.V1[0], &REGS.V1[1], &REGS.V1[2]},
{&REGS.V2[0], &REGS.V2[1], &REGS.V2[2]},
{&REGS.IR1, &REGS.IR2, &REGS.IR3},
};
static constexpr const s32 zero_T[3] = {};
static constexpr const s32* T_lookup[4] = {REGS.TR, REGS.BK, REGS.FC, zero_T};
const s16* M = M_lookup[inst.mvmva_multiply_matrix];
const s16* const* const V = V_lookup[inst.mvmva_multiply_vector];
const s32* const T = T_lookup[inst.mvmva_translation_vector];
s16 buggy_M[3][3];
if (!M)
{
case 0:
std::memcpy(M, REGS.RT, sizeof(s16) * 3 * 3);
break;
case 1:
std::memcpy(M, REGS.LLM, sizeof(s16) * 3 * 3);
break;
case 2:
std::memcpy(M, REGS.LCM, sizeof(s16) * 3 * 3);
break;
default:
{
// buggy
M[0][0] = -static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
M[0][1] = static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
M[0][2] = REGS.IR0;
M[1][0] = REGS.RT[0][2];
M[1][1] = REGS.RT[0][2];
M[1][2] = REGS.RT[0][2];
M[2][0] = REGS.RT[1][1];
M[2][1] = REGS.RT[1][1];
M[2][2] = REGS.RT[1][1];
}
break;
// buggy
buggy_M[0][0] = -static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
buggy_M[0][1] = static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
buggy_M[0][2] = REGS.IR0;
buggy_M[1][0] = REGS.RT[0][2];
buggy_M[1][1] = REGS.RT[0][2];
buggy_M[1][2] = REGS.RT[0][2];
buggy_M[2][0] = REGS.RT[1][1];
buggy_M[2][1] = REGS.RT[1][1];
buggy_M[2][2] = REGS.RT[1][1];
M = &buggy_M[0][0];
}
s16 Vx, Vy, Vz;
switch (inst.mvmva_multiply_vector)
{
case 0:
Vx = REGS.V0[0];
Vy = REGS.V0[1];
Vz = REGS.V0[2];
break;
case 1:
Vx = REGS.V1[0];
Vy = REGS.V1[1];
Vz = REGS.V1[2];
break;
case 2:
Vx = REGS.V2[0];
Vy = REGS.V2[1];
Vz = REGS.V2[2];
break;
default:
Vx = REGS.IR1;
Vy = REGS.IR2;
Vz = REGS.IR3;
break;
}
static const s32 zero_T[3] = {};
switch (inst.mvmva_translation_vector)
{
case 0:
MulMatVec(M, REGS.TR, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
case 1:
MulMatVec(M, REGS.BK, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
case 2:
MulMatVecBuggy(M, REGS.FC, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
default:
MulMatVec(M, zero_T, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
}
const s16 Vx = *V[0];
const s16 Vy = *V[1];
const s16 Vz = *V[2];
if (inst.mvmva_translation_vector != 2)
MulMatVec(M, T, Vx, Vy, Vz, inst.GetShift(), inst.lm);
else
MulMatVecBuggy(M, T, Vx, Vy, Vz, inst.GetShift(), inst.lm);
REGS.FLAG.UpdateError();
}
@ -874,10 +846,10 @@ static ALWAYS_INLINE void InterpolateColor(s64 in_MAC1, s64 in_MAC2, s64 in_MAC3
static void NCS(const s16 V[3], u8 shift, bool lm)
{
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm);
MulMatVec(&REGS.LLM[0][0], V[0], V[1], V[2], shift, lm);
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// Color FIFO = [MAC1/16,MAC2/16,MAC3/16,CODE], [IR1,IR2,IR3] = [MAC1,MAC2,MAC3]
PushRGBFromMAC();
@ -909,10 +881,10 @@ static void Execute_NCT(Instruction inst)
static void NCCS(const s16 V[3], u8 shift, bool lm)
{
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm);
MulMatVec(&REGS.LLM[0][0], V[0], V[1], V[2], shift, lm);
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx
@ -950,10 +922,10 @@ static void Execute_NCCT(Instruction inst)
static void NCDS(const s16 V[3], u8 shift, bool lm)
{
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm);
MulMatVec(&REGS.LLM[0][0], V[0], V[1], V[2], shift, lm);
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// No need to assign these to MAC[1-3], as it'll never overflow.
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx
@ -999,7 +971,7 @@ static void Execute_CC(Instruction inst)
const bool lm = inst.lm;
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12)
@ -1021,7 +993,7 @@ static void Execute_CDP(Instruction inst)
const bool lm = inst.lm;
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// No need to assign these to MAC[1-3], as it'll never overflow.
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4

View File

@ -108,8 +108,7 @@ static PGXP_value CP0_reg[32];
#define CPU_Lo CPU_reg[33]
// GTE registers
static PGXP_value GTE_data_reg[32];
static PGXP_value GTE_ctrl_reg[32];
static PGXP_value GTE_regs[64];
static PGXP_value* Mem = nullptr;
static PGXP_value* vertexCache = nullptr;
@ -274,8 +273,7 @@ void Initialize()
std::memset(CPU_reg, 0, sizeof(CPU_reg));
std::memset(CP0_reg, 0, sizeof(CP0_reg));
std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg));
std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg));
std::memset(GTE_regs, 0, sizeof(GTE_regs));
if (!Mem)
{
@ -306,8 +304,7 @@ void Reset()
std::memset(CPU_reg, 0, sizeof(CPU_reg));
std::memset(CP0_reg, 0, sizeof(CP0_reg));
std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg));
std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg));
std::memset(GTE_regs, 0, sizeof(GTE_regs));
if (Mem)
std::memset(Mem, 0, sizeof(PGXP_value) * PGXP_MEM_SIZE);
@ -329,8 +326,7 @@ void Shutdown()
Mem = nullptr;
}
std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg));
std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg));
std::memset(GTE_regs, 0, sizeof(GTE_regs));
std::memset(CPU_reg, 0, sizeof(CPU_reg));
std::memset(CP0_reg, 0, sizeof(CP0_reg));
@ -344,18 +340,19 @@ void Shutdown()
#define rt(_instr) ((_instr >> 16) & 0x1F) // The rt part of the instruction register
#define rs(_instr) ((_instr >> 21) & 0x1F) // The rs part of the instruction register
#define imm(_instr) (_instr & 0xFFFF) // The immediate part of the instruction register
#define cop2idx(_instr) (((_instr >> 11) & 0x1F) | ((_instr >> 17) & 0x20))
#define SX0 (GTE_data_reg[12].x)
#define SY0 (GTE_data_reg[12].y)
#define SX1 (GTE_data_reg[13].x)
#define SY1 (GTE_data_reg[13].y)
#define SX2 (GTE_data_reg[14].x)
#define SY2 (GTE_data_reg[14].y)
#define SX0 (GTE_regs[12].x)
#define SY0 (GTE_regs[12].y)
#define SX1 (GTE_regs[13].x)
#define SY1 (GTE_regs[13].y)
#define SX2 (GTE_regs[14].x)
#define SY2 (GTE_regs[14].y)
#define SXY0 (GTE_data_reg[12])
#define SXY1 (GTE_data_reg[13])
#define SXY2 (GTE_data_reg[14])
#define SXYP (GTE_data_reg[15])
#define SXY0 (GTE_regs[12])
#define SXY1 (GTE_regs[13])
#define SXY2 (GTE_regs[14])
#define SXYP (GTE_regs[15])
void GTE_PushSXYZ2f(float x, float y, float z, u32 v)
{
@ -428,49 +425,35 @@ static void PGXP_MTC2_int(PGXP_value value, u32 reg)
return;
}
GTE_data_reg[reg] = value;
GTE_regs[reg] = value;
}
////////////////////////////////////
// Data transfer tracking
////////////////////////////////////
void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal)
void CPU_MFC2(u32 instr, u32 rdVal)
{
// CPU[Rt] = GTE_D[Rd]
Validate(&GTE_data_reg[rd(instr)], rdVal);
CPU_reg[rt(instr)] = GTE_data_reg[rd(instr)];
CPU_reg[rt(instr)].value = rtVal;
const u32 idx = cop2idx(instr);
Validate(&GTE_regs[idx], rdVal);
CPU_reg[rt(instr)] = GTE_regs[idx];
CPU_reg[rt(instr)].value = rdVal;
}
void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal)
void CPU_MTC2(u32 instr, u32 rtVal)
{
// GTE_D[Rd] = CPU[Rt]
const u32 idx = cop2idx(instr);
Validate(&CPU_reg[rt(instr)], rtVal);
PGXP_MTC2_int(CPU_reg[rt(instr)], rd(instr));
GTE_data_reg[rd(instr)].value = rdVal;
}
void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal)
{
// CPU[Rt] = GTE_C[Rd]
Validate(&GTE_ctrl_reg[rd(instr)], rdVal);
CPU_reg[rt(instr)] = GTE_ctrl_reg[rd(instr)];
CPU_reg[rt(instr)].value = rtVal;
}
void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal)
{
// GTE_C[Rd] = CPU[Rt]
Validate(&CPU_reg[rt(instr)], rtVal);
GTE_ctrl_reg[rd(instr)] = CPU_reg[rt(instr)];
GTE_ctrl_reg[rd(instr)].value = rdVal;
PGXP_MTC2_int(CPU_reg[rt(instr)], idx);
GTE_regs[idx].value = rtVal;
}
////////////////////////////////////
// Memory Access
////////////////////////////////////
void CPU_LWC2(u32 instr, u32 rtVal, u32 addr)
void CPU_LWC2(u32 instr, u32 addr, u32 rtVal)
{
// GTE_D[Rt] = Mem[addr]
PGXP_value val;
@ -478,11 +461,11 @@ void CPU_LWC2(u32 instr, u32 rtVal, u32 addr)
PGXP_MTC2_int(val, rt(instr));
}
void CPU_SWC2(u32 instr, u32 rtVal, u32 addr)
void CPU_SWC2(u32 instr, u32 addr, u32 rtVal)
{
// Mem[addr] = GTE_D[Rt]
Validate(&GTE_data_reg[rt(instr)], rtVal);
WriteMem(&GTE_data_reg[rt(instr)], addr);
Validate(&GTE_regs[rt(instr)], rtVal);
WriteMem(&GTE_regs[rt(instr)], addr);
}
ALWAYS_INLINE_RELEASE void PGXP_CacheVertex(s16 sx, s16 sy, const PGXP_value& vertex)
@ -575,29 +558,29 @@ bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, f
#define imm_sext(_instr) \
static_cast<s32>(static_cast<s16>(_instr & 0xFFFF)) // The immediate part of the instruction register
void CPU_LW(u32 instr, u32 rtVal, u32 addr)
void CPU_LW(u32 instr, u32 addr, u32 rtVal)
{
// Rt = Mem[Rs + Im]
ValidateAndCopyMem(&CPU_reg[rt(instr)], addr, rtVal);
}
void CPU_LBx(u32 instr, u32 rtVal, u32 addr)
void CPU_LBx(u32 instr, u32 addr, u32 rtVal)
{
CPU_reg[rt(instr)] = PGXP_value_invalid;
}
void CPU_LHx(u32 instr, u32 rtVal, u32 addr)
void CPU_LHx(u32 instr, u32 addr, u32 rtVal)
{
// Rt = Mem[Rs + Im] (sign/zero extended)
ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, rtVal, 1);
}
void CPU_SB(u32 instr, u8 rtVal, u32 addr)
void CPU_SB(u32 instr, u32 addr, u32 rtVal)
{
WriteMem(&PGXP_value_invalid, addr);
}
void CPU_SH(u32 instr, u16 rtVal, u32 addr)
void CPU_SH(u32 instr, u32 addr, u32 rtVal)
{
PGXP_value* val = &CPU_reg[rt(instr)];
@ -606,7 +589,7 @@ void CPU_SH(u32 instr, u16 rtVal, u32 addr)
WriteMem16(val, addr);
}
void CPU_SW(u32 instr, u32 rtVal, u32 addr)
void CPU_SW(u32 instr, u32 addr, u32 rtVal)
{
// Mem[Rs + Im] = Rt
PGXP_value* val = &CPU_reg[rt(instr)];
@ -1587,10 +1570,10 @@ void CPU_MFHI(u32 instr, u32 hiVal)
CPU_reg[rd(instr)] = CPU_Hi;
}
void CPU_MTHI(u32 instr, u32 rdVal)
void CPU_MTHI(u32 instr, u32 rsVal)
{
// Hi = Rd
Validate(&CPU_reg[rd(instr)], rdVal);
Validate(&CPU_reg[rs(instr)], rsVal);
CPU_Hi = CPU_reg[rd(instr)];
}
@ -1603,10 +1586,10 @@ void CPU_MFLO(u32 instr, u32 loVal)
CPU_reg[rd(instr)] = CPU_Lo;
}
void CPU_MTLO(u32 instr, u32 rdVal)
void CPU_MTLO(u32 instr, u32 rsVal)
{
// Lo = Rd
Validate(&CPU_reg[rd(instr)], rdVal);
Validate(&CPU_reg[rs(instr)], rsVal);
CPU_Lo = CPU_reg[rd(instr)];
}

View File

@ -34,24 +34,22 @@ int GTE_NCLIP_valid(u32 sxy0, u32 sxy1, u32 sxy2);
float GTE_NCLIP();
// Data transfer tracking
void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE data reg to GPR reg (MFC2)
void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE data reg (MTC2)
void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE ctrl reg to GPR reg (CFC2)
void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE ctrl reg (CTC2)
void CPU_MFC2(u32 instr, u32 rdVal); // copy GTE data reg to GPR reg (MFC2)
void CPU_MTC2(u32 instr, u32 rtVal); // copy GPR reg to GTE data reg (MTC2)
// Memory Access
void CPU_LWC2(u32 instr, u32 rtVal, u32 addr); // copy memory to GTE reg
void CPU_SWC2(u32 instr, u32 rtVal, u32 addr); // copy GTE reg to memory
void CPU_LWC2(u32 instr, u32 addr, u32 rtVal); // copy memory to GTE reg
void CPU_SWC2(u32 instr, u32 addr, u32 rtVal); // copy GTE reg to memory
bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y,
float* out_w);
// -- CPU functions
void CPU_LW(u32 instr, u32 rtVal, u32 addr);
void CPU_LHx(u32 instr, u32 rtVal, u32 addr);
void CPU_LBx(u32 instr, u32 rtVal, u32 addr);
void CPU_SB(u32 instr, u8 rtVal, u32 addr);
void CPU_SH(u32 instr, u16 rtVal, u32 addr);
void CPU_SW(u32 instr, u32 rtVal, u32 addr);
void CPU_LW(u32 instr, u32 addr, u32 rtVal);
void CPU_LHx(u32 instr, u32 addr, u32 rtVal);
void CPU_LBx(u32 instr, u32 addr, u32 rtVal);
void CPU_SB(u32 instr, u32 addr, u32 rtVal);
void CPU_SH(u32 instr, u32 addr, u32 rtVal);
void CPU_SW(u32 instr, u32 addr, u32 rtVal);
void CPU_MOVE(u32 rd_and_rs, u32 rsVal);
// Arithmetic with immediate value
@ -93,9 +91,9 @@ void CPU_SRAV(u32 instr, u32 rtVal, u32 rsVal);
// Move registers
void CPU_MFHI(u32 instr, u32 hiVal);
void CPU_MTHI(u32 instr, u32 rdVal);
void CPU_MTHI(u32 instr, u32 rsVal);
void CPU_MFLO(u32 instr, u32 loVal);
void CPU_MTLO(u32 instr, u32 rdVal);
void CPU_MTLO(u32 instr, u32 rsVal);
// CP0 Data transfer tracking
void CPU_MFC0(u32 instr, u32 rdVal);

View File

@ -5,7 +5,7 @@
#include "types.h"
static constexpr u32 SAVE_STATE_MAGIC = 0x43435544;
static constexpr u32 SAVE_STATE_VERSION = 58;
static constexpr u32 SAVE_STATE_VERSION = 59;
static constexpr u32 SAVE_STATE_MINIMUM_VERSION = 42;
static_assert(SAVE_STATE_VERSION >= SAVE_STATE_MINIMUM_VERSION);

View File

@ -103,20 +103,21 @@ static void DestroySystem();
static std::string GetMediaPathFromSaveState(const char* path);
static bool DoLoadState(ByteStream* stream, bool force_software_renderer, bool update_display);
static bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display, bool is_memory_state);
static void DoRunFrame();
static bool CreateGPU(GPURenderer renderer);
static bool SaveUndoLoadState();
/// Throttles the system, i.e. sleeps until it's time to execute the next frame.
static void Throttle();
static void SetRewinding(bool enabled);
static bool SaveRewindState();
static void DoRewind();
static void SaveRunaheadState();
static void DoRunahead();
static void DoMemorySaveStates();
static bool DoRunahead();
static bool Initialize(bool force_software_renderer);
static bool FastForwardToFirstFrame();
static bool UpdateGameSettingsLayer();
static void UpdateRunningGame(const char* path, CDImage* image, bool booting);
@ -149,12 +150,16 @@ static std::string s_running_game_serial;
static std::string s_running_game_title;
static System::GameHash s_running_game_hash;
static bool s_running_unknown_game;
static bool s_was_fast_booted;
static float s_throttle_frequency = 60.0f;
static float s_target_speed = 1.0f;
static Common::Timer::Value s_frame_period = 0;
static Common::Timer::Value s_next_frame_time = 0;
static bool s_last_frame_skipped = false;
static bool s_system_executing = false;
static bool s_system_interrupted = false;
static bool s_frame_step_request = false;
static bool s_fast_forward_enabled = false;
static bool s_turbo_enabled = false;
@ -208,6 +213,7 @@ static bool s_rewinding_first_save = false;
static std::deque<MemorySaveState> s_runahead_states;
static bool s_runahead_replay_pending = false;
static u32 s_runahead_frames = 0;
static u32 s_runahead_replay_frames = 0;
static TinyString GetTimestampStringForFileName()
{
@ -227,9 +233,6 @@ void System::SetState(State new_state)
Assert(s_state == State::Paused || s_state == State::Running);
Assert(new_state == State::Paused || new_state == State::Running);
s_state = new_state;
if (new_state == State::Paused)
CPU::ForceDispatcherExit();
}
bool System::IsRunning()
@ -237,6 +240,11 @@ bool System::IsRunning()
return s_state == State::Running;
}
bool System::IsExecutionInterrupted()
{
return s_state != State::Running || s_system_interrupted;
}
bool System::IsPaused()
{
return s_state == State::Paused;
@ -304,18 +312,6 @@ u32 System::GetInternalFrameNumber()
return s_internal_frame_number;
}
void System::FrameDone()
{
s_frame_number++;
CPU::g_state.frame_done = true;
CPU::g_state.downcount = 0;
}
void System::IncrementInternalFrameNumber()
{
s_internal_frame_number++;
}
const std::string& System::GetDiscPath()
{
return s_running_game_path;
@ -340,6 +336,11 @@ bool System::IsRunningUnknownGame()
return s_running_unknown_game;
}
bool System::WasFastBooted()
{
return s_was_fast_booted;
}
const BIOS::ImageInfo* System::GetBIOSImageInfo()
{
return s_bios_image_info;
@ -529,7 +530,7 @@ bool System::GetGameDetailsFromImage(CDImage* cdi, std::string* out_id, GameHash
pos++;
}
}
if (out_id)
{
if (id.empty())
@ -644,7 +645,7 @@ std::string System::GetExecutableNameForImage(CDImage* cdi, bool strip_subdirect
}
bool System::ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_name,
std::vector<u8>* out_executable_data)
std::vector<u8>* out_executable_data)
{
ISOReader iso;
if (!iso.Open(cdi, 1))
@ -653,7 +654,8 @@ bool System::ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_n
return ReadExecutableFromImage(iso, out_executable_name, out_executable_data);
}
bool System::ReadExecutableFromImage(ISOReader& iso, std::string* out_executable_name, std::vector<u8>* out_executable_data)
bool System::ReadExecutableFromImage(ISOReader& iso, std::string* out_executable_name,
std::vector<u8>* out_executable_data)
{
const std::string executable_path = GetExecutableNameForImage(iso, false);
Log_DevPrintf("Executable path: '%s'", executable_path.c_str());
@ -886,7 +888,11 @@ void System::ApplySettings(bool display_osd_messages)
Host::CheckForSettingsChanges(old_config);
if (IsValid())
{
ResetPerformanceCounters();
if (s_system_executing)
s_system_interrupted = true;
}
}
bool System::ReloadGameSettings(bool display_osd_messages)
@ -1304,9 +1310,15 @@ bool System::BootSystem(SystemBootParameters parameters)
g_settings.bios_patch_fast_boot))
{
if (s_bios_image_info && s_bios_image_info->patch_compatible)
{
// TODO: Fast boot without patches...
BIOS::PatchBIOSFastBoot(Bus::g_bios, Bus::BIOS_SIZE);
s_was_fast_booted = true;
}
else
{
Log_ErrorPrintf("Not patching fast boot, as BIOS is not patch compatible.");
}
}
// Good to go.
@ -1346,6 +1358,9 @@ bool System::BootSystem(SystemBootParameters parameters)
if (parameters.load_image_to_ram || g_settings.cdrom_load_image_to_ram)
CDROM::PrecacheMedia();
if (parameters.fast_forward_to_first_frame)
FastForwardToFirstFrame();
if (g_settings.audio_dump_on_boot)
StartDumpingAudio();
@ -1370,6 +1385,10 @@ bool System::Initialize(bool force_software_renderer)
s_turbo_enabled = false;
s_fast_forward_enabled = false;
s_rewind_load_frequency = -1;
s_rewind_load_counter = -1;
s_rewinding_first_save = true;
s_average_frame_time_accumulator = 0.0f;
s_minimum_frame_time_accumulator = 0.0f;
s_maximum_frame_time_accumulator = 0.0f;
@ -1488,6 +1507,7 @@ bool System::Initialize(bool force_software_renderer)
void System::DestroySystem()
{
DebugAssert(!s_system_executing);
if (s_state == State::Shutdown)
return;
@ -1528,6 +1548,10 @@ void System::DestroySystem()
s_bios_hash = {};
s_bios_image_info = nullptr;
s_was_fast_booted = false;
s_cheat_list.reset();
s_state = State::Shutdown;
Host::OnSystemDestroyed();
}
@ -1539,8 +1563,6 @@ void System::ClearRunningGame()
s_running_game_title.clear();
s_running_game_hash = 0;
s_running_unknown_game = false;
s_cheat_list.reset();
s_state = State::Shutdown;
Host::OnGameChanged(s_running_game_path, s_running_game_serial, s_running_game_title);
@ -1549,25 +1571,124 @@ void System::ClearRunningGame()
#endif
}
bool System::FastForwardToFirstFrame()
{
// If we're taking more than 60 seconds to load the game, oof..
static constexpr u32 MAX_FRAMES_TO_SKIP = 30 * 60;
const u32 current_frame_number = s_frame_number;
const u32 current_internal_frame_number = s_internal_frame_number;
SPU::SetAudioOutputMuted(true);
while (s_internal_frame_number == current_internal_frame_number &&
(s_frame_number - current_frame_number) <= MAX_FRAMES_TO_SKIP)
{
Panic("Fixme");
// System::RunFrame();
}
SPU::SetAudioOutputMuted(false);
return (s_internal_frame_number != current_internal_frame_number);
}
void System::Execute()
{
while (System::IsRunning())
for (;;)
{
if (s_display_all_frames)
System::RunFrame();
else
System::RunFrames();
// this can shut us down
Host::PumpMessagesOnCPUThread();
if (!IsValid())
return;
if (s_frame_step_request)
switch (s_state)
{
s_frame_step_request = false;
PauseSystem(true);
case State::Running:
{
s_system_executing = true;
// TODO: Purge reset/restore
g_gpu->RestoreGraphicsAPIState();
if (s_rewind_load_counter >= 0)
DoRewind();
else
CPU::Execute();
g_gpu->ResetGraphicsAPIState();
s_system_executing = false;
continue;
}
case State::Stopping:
{
DestroySystem();
return;
}
case State::Paused:
default:
return;
}
}
}
void System::FrameDone()
{
s_frame_number++;
// Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns.
SPU::GeneratePendingSamples();
if (s_cheat_list)
s_cheat_list->Apply();
if (s_frame_step_request)
{
s_frame_step_request = false;
PauseSystem(true);
}
// Save states for rewind and runahead.
if (s_rewind_save_counter >= 0)
{
if (s_rewind_save_counter == 0)
{
SaveRewindState();
s_rewind_save_counter = s_rewind_save_frequency;
}
else
{
s_rewind_save_counter--;
}
}
else if (s_runahead_frames > 0)
{
// We don't want to poll during replay, because otherwise we'll lose frames.
if (s_runahead_replay_frames == 0)
{
// For runahead, poll input early, that way we can use the remainder of this frame to replay.
// *technically* this means higher input latency (by less than a frame), but runahead itself
// counter-acts that.
Host::PumpMessagesOnCPUThread();
if (IsExecutionInterrupted())
{
s_system_interrupted = false;
CPU::ExitExecution();
return;
}
}
if (DoRunahead())
{
// running ahead, get it done as soon as possible
return;
}
SaveRunaheadState();
}
const Common::Timer::Value current_time = Common::Timer::GetCurrentValue();
if (current_time < s_next_frame_time || s_display_all_frames || s_last_frame_skipped)
{
s_last_frame_skipped = false;
// TODO: Purge reset/restore
g_gpu->ResetGraphicsAPIState();
const bool skip_present = g_host_display->ShouldSkipDisplayingFrame();
Host::RenderDisplay(skip_present);
@ -1577,14 +1698,109 @@ void System::Execute()
s_presents_since_last_update++;
}
if (s_throttler_enabled)
System::Throttle();
// Update perf counters *after* throttling, we want to measure from start-of-frame
// to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different
// amounts of computation happening in each frame).
System::UpdatePerformanceCounters();
g_gpu->RestoreGraphicsAPIState();
}
else if (current_time >= s_next_frame_time)
{
Log_DebugPrintf("Skipping displaying frame");
s_last_frame_skipped = true;
}
if (s_throttler_enabled && !IsExecutionInterrupted())
Throttle();
// Input poll already done above
if (s_runahead_frames == 0)
{
Host::PumpMessagesOnCPUThread();
if (IsExecutionInterrupted())
{
s_system_interrupted = false;
CPU::ExitExecution();
return;
}
}
// Update perf counters *after* throttling, we want to measure from start-of-frame
// to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different
// amounts of computation happening in each frame).
System::UpdatePerformanceCounters();
}
void System::SetThrottleFrequency(float frequency)
{
if (s_throttle_frequency == frequency)
return;
s_throttle_frequency = frequency;
UpdateThrottlePeriod();
}
void System::UpdateThrottlePeriod()
{
if (s_target_speed > std::numeric_limits<double>::epsilon())
{
const double target_speed = std::max(static_cast<double>(s_target_speed), std::numeric_limits<double>::epsilon());
s_frame_period =
Common::Timer::ConvertSecondsToValue(1.0 / (static_cast<double>(s_throttle_frequency) * target_speed));
}
else
{
s_frame_period = 1;
}
ResetThrottler();
}
void System::ResetThrottler()
{
s_next_frame_time = Common::Timer::GetCurrentValue() + s_frame_period;
}
void System::Throttle()
{
// If we're running too slow, advance the next frame time based on the time we lost. Effectively skips
// running those frames at the intended time, because otherwise if we pause in the debugger, we'll run
// hundreds of frames when we resume.
Common::Timer::Value current_time = Common::Timer::GetCurrentValue();
if (current_time > s_next_frame_time)
{
const Common::Timer::Value diff = static_cast<s64>(current_time) - static_cast<s64>(s_next_frame_time);
s_next_frame_time += (diff / s_frame_period) * s_frame_period + s_frame_period;
return;
}
// Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery.
// Linux also seems to do a much better job of waking up at the requested time.
#if !defined(__linux__) && !defined(__ANDROID__)
Common::Timer::SleepUntil(s_next_frame_time, g_settings.display_all_frames);
#else
Common::Timer::SleepUntil(s_next_frame_time, false);
#endif
s_next_frame_time += s_frame_period;
}
void System::SingleStepCPU()
{
s_frame_timer.Reset();
s_system_executing = true;
g_gpu->RestoreGraphicsAPIState();
CPU::SingleStep();
SPU::GeneratePendingSamples();
g_gpu->ResetGraphicsAPIState();
s_system_executing = false;
}
void System::IncrementInternalFrameNumber()
{
s_internal_frame_number++;
}
void System::RecreateSystem()
@ -2163,159 +2379,11 @@ bool System::InternalSaveState(ByteStream* state, u32 screenshot_size /* = 256 *
return true;
}
void System::SingleStepCPU()
{
const u32 old_frame_number = s_frame_number;
s_frame_timer.Reset();
g_gpu->RestoreGraphicsAPIState();
CPU::SingleStep();
SPU::GeneratePendingSamples();
if (s_frame_number != old_frame_number && s_cheat_list)
s_cheat_list->Apply();
g_gpu->ResetGraphicsAPIState();
}
void System::DoRunFrame()
{
g_gpu->RestoreGraphicsAPIState();
if (CPU::g_state.use_debug_dispatcher)
{
CPU::ExecuteDebug();
}
else
{
switch (g_settings.cpu_execution_mode)
{
case CPUExecutionMode::Recompiler:
#ifdef WITH_RECOMPILER
CPU::CodeCache::ExecuteRecompiler();
#else
CPU::CodeCache::Execute();
#endif
break;
case CPUExecutionMode::CachedInterpreter:
CPU::CodeCache::Execute();
break;
case CPUExecutionMode::Interpreter:
default:
CPU::Execute();
break;
}
}
// Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns.
SPU::GeneratePendingSamples();
if (s_cheat_list)
s_cheat_list->Apply();
g_gpu->ResetGraphicsAPIState();
}
void System::RunFrame()
{
if (s_rewind_load_counter >= 0)
{
DoRewind();
return;
}
if (s_runahead_frames > 0)
DoRunahead();
DoRunFrame();
s_next_frame_time += s_frame_period;
if (s_memory_saves_enabled)
DoMemorySaveStates();
}
float System::GetTargetSpeed()
{
return s_target_speed;
}
void System::SetThrottleFrequency(float frequency)
{
s_throttle_frequency = frequency;
UpdateThrottlePeriod();
}
void System::UpdateThrottlePeriod()
{
if (s_target_speed > std::numeric_limits<double>::epsilon())
{
const double target_speed = std::max(static_cast<double>(s_target_speed), std::numeric_limits<double>::epsilon());
s_frame_period =
Common::Timer::ConvertSecondsToValue(1.0 / (static_cast<double>(s_throttle_frequency) * target_speed));
}
else
{
s_frame_period = 1;
}
ResetThrottler();
}
void System::ResetThrottler()
{
s_next_frame_time = Common::Timer::GetCurrentValue();
}
void System::Throttle()
{
// If we're running too slow, advance the next frame time based on the time we lost. Effectively skips
// running those frames at the intended time, because otherwise if we pause in the debugger, we'll run
// hundreds of frames when we resume.
Common::Timer::Value current_time = Common::Timer::GetCurrentValue();
if (current_time > s_next_frame_time)
{
const Common::Timer::Value diff = static_cast<s64>(current_time) - static_cast<s64>(s_next_frame_time);
s_next_frame_time += (diff / s_frame_period) * s_frame_period;
return;
}
// Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery.
// Linux also seems to do a much better job of waking up at the requested time.
#if !defined(__linux__) && !defined(__ANDROID__)
Common::Timer::SleepUntil(s_next_frame_time, g_settings.display_all_frames);
#else
Common::Timer::SleepUntil(s_next_frame_time, false);
#endif
}
void System::RunFrames()
{
// If we're running more than this in a single loop... we're in for a bad time.
const u32 max_frames_to_run = 2;
u32 frames_run = 0;
Common::Timer::Value value = Common::Timer::GetCurrentValue();
while (frames_run < max_frames_to_run)
{
if (value < s_next_frame_time)
break;
RunFrame();
frames_run++;
value = Common::Timer::GetCurrentValue();
}
if (frames_run != 1)
Log_VerbosePrintf("Ran %u frames in a single host frame", frames_run);
}
void System::UpdatePerformanceCounters()
{
const float frame_time = static_cast<float>(s_frame_timer.GetTimeMillisecondsAndReset());
@ -3625,18 +3693,22 @@ void System::SetRewinding(bool enabled)
{
if (enabled)
{
const bool was_enabled = IsRewinding();
// Try to rewind at the replay speed, or one per second maximum.
const float load_frequency = std::min(g_settings.rewind_save_frequency, 1.0f);
s_rewind_load_frequency = static_cast<s32>(std::ceil(load_frequency * s_throttle_frequency));
s_rewind_load_counter = 0;
if (!was_enabled && s_system_executing)
s_system_interrupted = true;
}
else
{
s_rewind_load_frequency = -1;
s_rewind_load_counter = -1;
s_rewinding_first_save = true;
}
s_rewinding_first_save = true;
}
void System::DoRewind()
@ -3655,6 +3727,15 @@ void System::DoRewind()
}
s_next_frame_time += s_frame_period;
// TODO: Purge reset/restore
g_gpu->ResetGraphicsAPIState();
Host::RenderDisplay(false);
g_gpu->RestoreGraphicsAPIState();
Host::PumpMessagesOnCPUThread();
Throttle();
}
void System::SaveRunaheadState()
@ -3676,84 +3757,70 @@ void System::SaveRunaheadState()
s_runahead_states.push_back(std::move(mss));
}
void System::DoRunahead()
bool System::DoRunahead()
{
#ifdef PROFILE_MEMORY_SAVE_STATES
Common::Timer timer;
Log_DevPrintf("runahead starting at frame %u", s_frame_number);
static Common::Timer replay_timer;
#endif
if (s_runahead_replay_pending)
{
#ifdef PROFILE_MEMORY_SAVE_STATES
Log_DevPrintf("runahead starting at frame %u", s_frame_number);
replay_timer.Reset();
#endif
// we need to replay and catch up - load the state,
s_runahead_replay_pending = false;
if (s_runahead_states.empty() || !LoadMemoryState(s_runahead_states.front()))
{
s_runahead_states.clear();
return;
return false;
}
// figure out how many frames we need to run to catch up
s_runahead_replay_frames = static_cast<u32>(s_runahead_states.size());
// and throw away all the states, forcing us to catch up below
// TODO: can we leave one frame here and run, avoiding the extra save?
s_runahead_states.clear();
#ifdef PROFILE_MEMORY_SAVE_STATES
Log_VerbosePrintf("Rewound to frame %u, took %.2f ms", s_frame_number, timer.GetTimeMilliseconds());
#endif
}
// run the frames with no audio
s32 frames_to_run = static_cast<s32>(s_runahead_frames) - static_cast<s32>(s_runahead_states.size());
if (frames_to_run > 0)
{
Common::Timer timer2;
#ifdef PROFILE_MEMORY_SAVE_STATES
const s32 temp = frames_to_run;
#endif
// run the frames with no audio
SPU::SetAudioOutputMuted(true);
while (frames_to_run > 0)
{
DoRunFrame();
SaveRunaheadState();
frames_to_run--;
}
SPU::SetAudioOutputMuted(false);
#ifdef PROFILE_MEMORY_SAVE_STATES
Log_VerbosePrintf("Running %d frames to catch up took %.2f ms", temp, timer2.GetTimeMilliseconds());
Log_VerbosePrintf("Rewound to frame %u, took %.2f ms", s_frame_number, replay_timer.GetTimeMilliseconds());
#endif
// we don't want to save the frame we just loaded. but we are "one frame ahead", because the frame we just tossed
// was never saved, so return but don't decrement the counter
return true;
}
else
else if (s_runahead_replay_frames == 0)
{
// save this frame
return false;
}
s_runahead_replay_frames--;
if (s_runahead_replay_frames > 0)
{
// keep running ahead
SaveRunaheadState();
return true;
}
#ifdef PROFILE_MEMORY_SAVE_STATES
Log_DevPrintf("runahead ending at frame %u, took %.2f ms", s_frame_number, timer.GetTimeMilliseconds());
Log_VerbosePrintf("Running %d frames to catch up took %.2f ms", s_runahead_frames,
replay_timer.GetTimeMilliseconds());
#endif
}
void System::DoMemorySaveStates()
{
if (s_rewind_save_counter >= 0)
{
if (s_rewind_save_counter == 0)
{
SaveRewindState();
s_rewind_save_counter = s_rewind_save_frequency;
}
else
{
s_rewind_save_counter--;
}
}
// we're all caught up. this frame gets saved in DoMemoryStates().
SPU::SetAudioOutputMuted(false);
if (s_runahead_frames > 0)
SaveRunaheadState();
#ifdef PROFILE_MEMORY_SAVE_STATES
Log_DevPrintf("runahead ending at frame %u, took %.2f ms", s_frame_number, replay_timer.GetTimeMilliseconds());
#endif
return false;
}
void System::SetRunaheadReplayFlag()
@ -3776,7 +3843,10 @@ void System::ShutdownSystem(bool save_resume_state)
if (save_resume_state)
SaveResumeState();
DestroySystem();
if (s_system_executing)
s_state = State::Stopping;
else
DestroySystem();
}
bool System::CanUndoLoadState()

View File

@ -42,6 +42,7 @@ struct SystemBootParameters
u32 media_playlist_index = 0;
bool load_image_to_ram = false;
bool force_software_renderer = false;
bool fast_forward_to_first_frame = false;
};
struct SaveStateInfo
@ -85,7 +86,8 @@ enum class State
Shutdown,
Starting,
Running,
Paused
Paused,
Stopping,
};
using GameHash = u64;
@ -110,7 +112,6 @@ ConsoleRegion GetConsoleRegionForDiscRegion(DiscRegion region);
std::string GetExecutableNameForImage(CDImage* cdi, bool strip_subdirectories);
bool ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_name, std::vector<u8>* out_executable_data);
bool IsValidGameImage(CDImage* cdi);
std::string GetGameHashId(GameHash hash);
bool GetGameDetailsFromImage(CDImage* cdi, std::string* out_id, GameHash* out_hash);
DiscRegion GetRegionForSerial(std::string_view serial);
@ -129,6 +130,7 @@ std::string GetInputProfilePath(const std::string_view& name);
State GetState();
void SetState(State new_state);
bool IsRunning();
bool IsExecutionInterrupted();
bool IsPaused();
bool IsShutdown();
bool IsValid();
@ -176,14 +178,15 @@ bool InjectEXEFromBuffer(const void* buffer, u32 buffer_size, bool patch_loader
u32 GetFrameNumber();
u32 GetInternalFrameNumber();
void FrameDone();
void IncrementInternalFrameNumber();
void FrameDone();
const std::string& GetDiscPath();
const std::string& GetGameSerial();
const std::string& GetGameTitle();
GameHash GetGameHash();
bool IsRunningUnknownGame();
bool WasFastBooted();
const BIOS::ImageInfo* GetBIOSImageInfo();
const BIOS::Hash& GetBIOSHash();
@ -237,8 +240,6 @@ void RecreateSystem();
bool RecreateGPU(GPURenderer renderer, bool force_recreate_display = false, bool update_display = true);
void SingleStepCPU();
void RunFrame();
void RunFrames();
/// Sets target emulation speed.
float GetTargetSpeed();
@ -250,9 +251,6 @@ void SetThrottleFrequency(float frequency);
void UpdateThrottlePeriod();
void ResetThrottler();
/// Throttles the system, i.e. sleeps until it's time to execute the next frame.
void Throttle();
void UpdatePerformanceCounters();
void ResetPerformanceCounters();

View File

@ -17,6 +17,7 @@ static TimingEvent* s_active_events_tail;
static TimingEvent* s_current_event = nullptr;
static u32 s_active_event_count = 0;
static u32 s_global_tick_counter = 0;
static bool s_frame_done = false;
u32 GetGlobalTickCounter()
{
@ -51,10 +52,7 @@ std::unique_ptr<TimingEvent> CreateTimingEvent(std::string name, TickCount perio
void UpdateCPUDowncount()
{
if (!CPU::g_state.frame_done && (!CPU::HasPendingInterrupt() || CPU::g_using_interpreter))
{
CPU::g_state.downcount = s_active_events_head->GetDowncount();
}
CPU::g_state.downcount = CPU::HasPendingInterrupt() ? 0 : s_active_events_head->GetDowncount();
}
TimingEvent** GetHeadEventPtr()
@ -260,48 +258,76 @@ static TimingEvent* FindActiveEvent(const char* name)
return nullptr;
}
bool IsRunningEvents()
{
return (s_current_event != nullptr);
}
void SetFrameDone()
{
s_frame_done = true;
CPU::g_state.downcount = 0;
}
void RunEvents()
{
DebugAssert(!s_current_event);
TickCount pending_ticks = CPU::GetPendingTicks();
CPU::ResetPendingTicks();
while (pending_ticks > 0)
do
{
const TickCount time = std::min(pending_ticks, s_active_events_head->GetDowncount());
s_global_tick_counter += static_cast<u32>(time);
pending_ticks -= time;
if (CPU::HasPendingInterrupt())
CPU::DispatchInterrupt();
// Apply downcount to all events.
// This will result in a negative downcount for those events which are late.
for (TimingEvent* event = s_active_events_head; event; event = event->next)
TickCount pending_ticks = CPU::GetPendingTicks();
if (pending_ticks >= s_active_events_head->GetDowncount())
{
event->m_downcount -= time;
event->m_time_since_last_run += time;
CPU::ResetPendingTicks();
do
{
const TickCount time = std::min(pending_ticks, s_active_events_head->GetDowncount());
s_global_tick_counter += static_cast<u32>(time);
pending_ticks -= time;
// Apply downcount to all events.
// This will result in a negative downcount for those events which are late.
for (TimingEvent* event = s_active_events_head; event; event = event->next)
{
event->m_downcount -= time;
event->m_time_since_last_run += time;
}
// Now we can actually run the callbacks.
while (s_active_events_head->m_downcount <= 0)
{
// move it to the end, since that'll likely be its new position
TimingEvent* event = s_active_events_head;
s_current_event = event;
// Factor late time into the time for the next invocation.
const TickCount ticks_late = -event->m_downcount;
const TickCount ticks_to_execute = event->m_time_since_last_run;
event->m_downcount += event->m_interval;
event->m_time_since_last_run = 0;
// The cycles_late is only an indicator, it doesn't modify the cycles to execute.
event->m_callback(event->m_callback_param, ticks_to_execute, ticks_late);
if (event->m_active)
SortEvent(event);
}
} while (pending_ticks > 0);
s_current_event = nullptr;
}
// Now we can actually run the callbacks.
while (s_active_events_head->m_downcount <= 0)
if (s_frame_done)
{
// move it to the end, since that'll likely be its new position
TimingEvent* event = s_active_events_head;
s_current_event = event;
// Factor late time into the time for the next invocation.
const TickCount ticks_late = -event->m_downcount;
const TickCount ticks_to_execute = event->m_time_since_last_run;
event->m_downcount += event->m_interval;
event->m_time_since_last_run = 0;
// The cycles_late is only an indicator, it doesn't modify the cycles to execute.
event->m_callback(event->m_callback_param, ticks_to_execute, ticks_late);
if (event->m_active)
SortEvent(event);
s_frame_done = false;
System::FrameDone();
}
}
s_current_event = nullptr;
UpdateCPUDowncount();
UpdateCPUDowncount();
} while (CPU::GetPendingTicks() >= CPU::g_state.downcount);
}
bool DoState(StateWrapper& sw)
@ -347,7 +373,7 @@ bool DoState(StateWrapper& sw)
sw.Do(&last_event_run_time);
}
Log_DevPrintf("Loaded %u events from save state.", event_count);
Log_DebugPrintf("Loaded %u events from save state.", event_count);
SortEvents();
}
else
@ -364,7 +390,7 @@ bool DoState(StateWrapper& sw)
sw.Do(&event->m_interval);
}
Log_DevPrintf("Wrote %u events to save state.", s_active_event_count);
Log_DebugPrintf("Wrote %u events to save state.", s_active_event_count);
}
return !sw.HasError();
@ -407,6 +433,8 @@ void TimingEvent::Delay(TickCount ticks)
DebugAssert(TimingEvents::s_current_event != this);
TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
}
void TimingEvent::Schedule(TickCount ticks)
@ -426,7 +454,11 @@ void TimingEvent::Schedule(TickCount ticks)
// Event is already active, so we leave the time since last run alone, and just modify the downcount.
// If this is a call from an IO handler for example, re-sort the event queue.
if (TimingEvents::s_current_event != this)
{
TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
}
}
}
@ -451,7 +483,11 @@ void TimingEvent::Reset()
m_downcount = m_interval;
m_time_since_last_run = 0;
if (TimingEvents::s_current_event != this)
{
TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
}
}
void TimingEvent::InvokeEarly(bool force /* = false */)
@ -471,6 +507,8 @@ void TimingEvent::InvokeEarly(bool force /* = false */)
// Since we've changed the downcount, we need to re-sort the events.
DebugAssert(TimingEvents::s_current_event != this);
TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
}
void TimingEvent::Activate()

View File

@ -93,6 +93,8 @@ std::unique_ptr<TimingEvent> CreateTimingEvent(std::string name, TickCount perio
/// Serialization.
bool DoState(StateWrapper& sw);
bool IsRunningEvents();
void SetFrameDone();
void RunEvents();
void UpdateCPUDowncount();