CPU/NewRec: Fix PGXP desync

Wobbly geometry in Threads of Fate.
This commit is contained in:
Stenzek
2024-03-30 00:04:35 +10:00
parent 979b7bd36f
commit a3013efbca
5 changed files with 221 additions and 124 deletions

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "cpu_newrec_compiler_aarch64.h"
@ -1616,9 +1616,9 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
{
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
// new_value = (value & mask) | (RWRET << (24 - shift));
EmitMov(addr, 0xFFFFFFu);
armAsm->lsrv(addr, addr, RWARG2);
armAsm->and_(value, value, addr);
EmitMov(RWARG4, 0xFFFFFFu);
armAsm->lsrv(RWARG4, RWARG4, RWARG2);
armAsm->and_(value, value, RWARG4);
armAsm->lslv(RWRET, RWRET, RWARG3);
armAsm->orr(value, value, RWRET);
}
@ -1627,27 +1627,40 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
// new_value = (value & mask) | (RWRET >> shift);
armAsm->lsrv(RWRET, RWRET, RWARG2);
EmitMov(addr, 0xFFFFFF00u);
armAsm->lslv(addr, addr, RWARG3);
armAsm->and_(value, value, addr);
EmitMov(RWARG4, 0xFFFFFF00u);
armAsm->lslv(RWARG4, RWARG4, RWARG3);
armAsm->and_(value, value, RWARG4);
armAsm->orr(value, value, RWRET);
}
FreeHostReg(addr.GetCode());
if (g_settings.gpu_pgxp_enable)
{
Flush(FLUSH_FOR_C_CALL);
armAsm->mov(RWARG3, value);
armAsm->and_(RWARG2, addr, armCheckLogicalConstant(~0x3u));
EmitMov(RWARG1, inst->bits);
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LW));
}
}
void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
const auto [ptr, action] = GetGTERegisterPointer(index, true);
const std::optional<WRegister> addr_reg =
g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
std::optional<WRegister>();
FlushForLoadStore(address, false, use_fastmem);
const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
const WRegister value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action]() {
return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ?
WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) :
RWRET;
});
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
const auto [ptr, action] = GetGTERegisterPointer(index, true);
switch (action)
{
case GTERegisterAccessAction::Ignore:
@ -1657,28 +1670,28 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
case GTERegisterAccessAction::Direct:
{
armAsm->str(RWRET, PTR(ptr));
armAsm->str(value, PTR(ptr));
break;
}
case GTERegisterAccessAction::SignExtend16:
{
armAsm->sxth(RWRET, RWRET);
armAsm->str(RWRET, PTR(ptr));
armAsm->sxth(RWARG3, value);
armAsm->str(RWARG3, PTR(ptr));
break;
}
case GTERegisterAccessAction::ZeroExtend16:
{
armAsm->uxth(RWRET, RWRET);
armAsm->str(RWRET, PTR(ptr));
armAsm->uxth(RWARG3, value);
armAsm->str(RWARG3, PTR(ptr));
break;
}
case GTERegisterAccessAction::CallHandler:
{
Flush(FLUSH_FOR_C_CALL);
armAsm->mov(RWARG2, RWRET);
armAsm->mov(RWARG2, value);
EmitMov(RWARG1, index);
EmitCall(reinterpret_cast<const void*>(&GTE::WriteRegister));
break;
@ -1689,12 +1702,12 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
// SXY0 <- SXY1
// SXY1 <- SXY2
// SXY2 <- SXYP
DebugAssert(RWRET.GetCode() != RWARG2.GetCode() && RWRET.GetCode() != RWARG3.GetCode());
DebugAssert(value.GetCode() != RWARG2.GetCode() && value.GetCode() != RWARG3.GetCode());
armAsm->ldr(RWARG2, PTR(&g_state.gte_regs.SXY1[0]));
armAsm->ldr(RWARG3, PTR(&g_state.gte_regs.SXY2[0]));
armAsm->str(RWARG2, PTR(&g_state.gte_regs.SXY0[0]));
armAsm->str(RWARG3, PTR(&g_state.gte_regs.SXY1[0]));
armAsm->str(RWRET, PTR(&g_state.gte_regs.SXY2[0]));
armAsm->str(value, PTR(&g_state.gte_regs.SXY2[0]));
break;
}
@ -1708,11 +1721,13 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
if (g_settings.gpu_pgxp_enable)
{
Flush(FLUSH_FOR_C_CALL);
armAsm->mov(RWARG3, RWRET);
armAsm->mov(RWARG3, value);
if (value.GetCode() != RWRET.GetCode())
FreeHostReg(value.GetCode());
armAsm->mov(RWARG2, addr);
FreeHostReg(addr_reg.value().GetCode());
EmitMov(RWARG1, inst->bits);
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));
FreeHostReg(addr_reg.value().GetCode());
}
}
@ -1761,17 +1776,18 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the flush
const Reg rt = inst->r.rt;
const WRegister value = RWARG2;
if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value())
armAsm->mov(value, WRegister(rtreg.value()));
else if (HasConstantReg(rt))
EmitMov(value, GetConstantRegU32(rt));
else
armAsm->ldr(value, MipsPtr(rt));
const WRegister value = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2;
MoveMIPSRegToReg(value, rt);
armAsm->and_(RWSCRATCH, addr, 3);
armAsm->lsl(RWSCRATCH, RWSCRATCH, 3); // *8
// Don't need the original address anymore.
if (!g_settings.gpu_pgxp_enable)
FreeHostReg(addr.GetCode());
else
armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u));
if (inst->op == InstructionOp::swl)
{
// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;
@ -1801,8 +1817,18 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
FreeHostReg(addr.GetCode());
armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u));
GenerateStore(RWARG1, value, MemoryAccessSize::Word, use_fastmem);
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
if (g_settings.gpu_pgxp_enable)
{
Flush(FLUSH_FOR_C_CALL);
armAsm->mov(RWARG3, value);
FreeHostReg(value.GetCode());
armAsm->mov(RWARG2, addr);
FreeHostReg(addr.GetCode());
EmitMov(RWARG1, inst->bits);
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SW));
}
}
void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,