System: Refactor main loop

Reduces JIT exits.
Improves runahead performance.
This commit is contained in:
Stenzek
2023-08-15 23:12:21 +10:00
parent 4ebd34fcb3
commit 5b980dafa5
43 changed files with 1343 additions and 923 deletions

View File

@ -4,6 +4,7 @@
#include "cpu_core.h"
#include "bus.h"
#include "common/align.h"
#include "common/fastjmp.h"
#include "common/file_system.h"
#include "common/log.h"
#include "cpu_core_private.h"
@ -29,9 +30,10 @@ static void Branch(u32 target);
static void FlushPipeline();
State g_state;
bool g_using_interpreter = false;
bool TRACE_EXECUTION = false;
static fastjmp_buf s_jmp_buf;
static std::FILE* s_log_file = nullptr;
static bool s_log_file_opened = false;
static bool s_trace_to_log = false;
@ -41,6 +43,7 @@ static std::vector<Breakpoint> s_breakpoints;
static u32 s_breakpoint_counter = 1;
static u32 s_last_breakpoint_check_pc = INVALID_BREAKPOINT_PC;
static bool s_single_step = false;
static bool s_single_step_done = false;
bool IsTraceEnabled()
{
@ -134,6 +137,7 @@ void Reset()
GTE::Reset();
// TODO: This consumes cycles...
SetPC(RESET_VECTOR);
}
@ -141,7 +145,9 @@ bool DoState(StateWrapper& sw)
{
sw.Do(&g_state.pending_ticks);
sw.Do(&g_state.downcount);
sw.DoArray(g_state.regs.r, countof(g_state.regs.r));
sw.DoArray(g_state.regs.r, static_cast<u32>(Reg::count));
sw.Do(&g_state.pc);
sw.Do(&g_state.npc);
sw.Do(&g_state.cop0_regs.BPC);
sw.Do(&g_state.cop0_regs.BDA);
sw.Do(&g_state.cop0_regs.TAR);
@ -161,11 +167,23 @@ bool DoState(StateWrapper& sw)
sw.Do(&g_state.next_instruction_is_branch_delay_slot);
sw.Do(&g_state.branch_was_taken);
sw.Do(&g_state.exception_raised);
sw.Do(&g_state.interrupt_delay);
if (sw.GetVersion() < 59)
{
bool interrupt_delay;
sw.Do(&interrupt_delay);
}
sw.Do(&g_state.load_delay_reg);
sw.Do(&g_state.load_delay_value);
sw.Do(&g_state.next_load_delay_reg);
sw.Do(&g_state.next_load_delay_value);
// Compatibility with old states.
if (sw.GetVersion() < 59)
{
g_state.load_delay_reg = static_cast<Reg>(std::min(static_cast<u8>(g_state.load_delay_reg), static_cast<u8>(Reg::count)));
g_state.next_load_delay_reg = static_cast<Reg>(std::min(static_cast<u8>(g_state.load_delay_reg), static_cast<u8>(Reg::count)));
}
sw.Do(&g_state.cache_control.bits);
sw.DoBytes(g_state.dcache.data(), g_state.dcache.size());
@ -203,7 +221,7 @@ void UpdateFastmemBase()
ALWAYS_INLINE_RELEASE void SetPC(u32 new_pc)
{
DebugAssert(Common::IsAlignedPow2(new_pc, 4));
g_state.regs.npc = new_pc;
g_state.npc = new_pc;
FlushPipeline();
}
@ -217,7 +235,7 @@ ALWAYS_INLINE_RELEASE void Branch(u32 target)
return;
}
g_state.regs.npc = target;
g_state.npc = target;
g_state.branch_was_taken = true;
}
@ -257,14 +275,14 @@ ALWAYS_INLINE_RELEASE static void RaiseException(u32 CAUSE_bits, u32 EPC, u32 ve
// TAR is set to the address which was being fetched in this instruction, or the next instruction to execute if the
// exception hadn't occurred in the delay slot.
g_state.cop0_regs.EPC -= UINT32_C(4);
g_state.cop0_regs.TAR = g_state.regs.pc;
g_state.cop0_regs.TAR = g_state.pc;
}
// current -> previous, switch to kernel mode and disable interrupts
g_state.cop0_regs.sr.mode_bits <<= 2;
// flush the pipeline - we don't want to execute the previously fetched instruction
g_state.regs.npc = vector;
g_state.npc = vector;
g_state.exception_raised = true;
FlushPipeline();
}
@ -299,7 +317,7 @@ void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits)
if (PCDrv::HandleSyscall(instruction_bits, g_state.regs))
{
// immediately return
g_state.regs.npc = EPC + 4;
g_state.npc = EPC + 4;
FlushPipeline();
return;
}
@ -311,16 +329,7 @@ void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits)
void SetExternalInterrupt(u8 bit)
{
g_state.cop0_regs.cause.Ip |= static_cast<u8>(1u << bit);
if (g_settings.cpu_execution_mode == CPUExecutionMode::Interpreter)
{
g_state.interrupt_delay = 1;
}
else
{
g_state.interrupt_delay = 0;
CheckForPendingInterrupt();
}
CheckForPendingInterrupt();
}
void ClearExternalInterrupt(u8 bit)
@ -331,9 +340,7 @@ void ClearExternalInterrupt(u8 bit)
ALWAYS_INLINE_RELEASE static void UpdateLoadDelay()
{
// the old value is needed in case the delay slot instruction overwrites the same register
if (g_state.load_delay_reg != Reg::count)
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.load_delay_reg = g_state.next_load_delay_reg;
g_state.load_delay_value = g_state.next_load_delay_value;
g_state.next_load_delay_reg = Reg::count;
@ -343,16 +350,13 @@ ALWAYS_INLINE_RELEASE static void FlushPipeline()
{
// loads are flushed
g_state.next_load_delay_reg = Reg::count;
if (g_state.load_delay_reg != Reg::count)
{
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.load_delay_reg = Reg::count;
}
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.load_delay_reg = Reg::count;
// not in a branch delay slot
g_state.branch_was_taken = false;
g_state.next_instruction_is_branch_delay_slot = false;
g_state.current_instruction_pc = g_state.regs.pc;
g_state.current_instruction_pc = g_state.pc;
// prefetch the next instruction
FetchInstruction();
@ -649,8 +653,8 @@ const std::array<DebuggerRegisterListEntry, NUM_DEBUGGER_REGISTER_LIST_ENTRIES>
{"ra", &CPU::g_state.regs.ra},
{"hi", &CPU::g_state.regs.hi},
{"lo", &CPU::g_state.regs.lo},
{"pc", &CPU::g_state.regs.pc},
{"npc", &CPU::g_state.regs.npc},
{"pc", &CPU::g_state.pc},
{"npc", &CPU::g_state.npc},
{"COP0_SR", &CPU::g_state.cop0_regs.sr.bits},
{"COP0_CAUSE", &CPU::g_state.cop0_regs.cause.bits},
@ -1111,7 +1115,7 @@ restart_instruction:
{
g_state.next_instruction_is_branch_delay_slot = true;
const u32 target = ReadReg(inst.r.rs);
WriteReg(inst.r.rd, g_state.regs.npc);
WriteReg(inst.r.rd, g_state.npc);
Branch(target);
}
break;
@ -1267,7 +1271,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, sxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LBx(inst.bits, sxvalue, addr);
PGXP::CPU_LBx(inst.bits, addr, sxvalue);
}
break;
@ -1285,7 +1289,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, sxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LHx(inst.bits, sxvalue, addr);
PGXP::CPU_LHx(inst.bits, addr, sxvalue);
}
break;
@ -1302,7 +1306,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LW(inst.bits, value, addr);
PGXP::CPU_LW(inst.bits, addr, value);
}
break;
@ -1320,7 +1324,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, zxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LBx(inst.bits, zxvalue, addr);
PGXP::CPU_LBx(inst.bits, addr, zxvalue);
}
break;
@ -1338,7 +1342,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, zxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LHx(inst.bits, zxvalue, addr);
PGXP::CPU_LHx(inst.bits, addr, zxvalue);
}
break;
@ -1372,7 +1376,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, new_value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LW(inst.bits, new_value, addr);
PGXP::CPU_LW(inst.bits, addr, new_value);
}
break;
@ -1386,7 +1390,7 @@ restart_instruction:
WriteMemoryByte(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SB(inst.bits, Truncate8(value), addr);
PGXP::CPU_SB(inst.bits, addr, value);
}
break;
@ -1400,7 +1404,7 @@ restart_instruction:
WriteMemoryHalfWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SH(inst.bits, Truncate16(value), addr);
PGXP::CPU_SH(inst.bits, addr, value);
}
break;
@ -1414,7 +1418,7 @@ restart_instruction:
WriteMemoryWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SW(inst.bits, value, addr);
PGXP::CPU_SW(inst.bits, addr, value);
}
break;
@ -1447,22 +1451,22 @@ restart_instruction:
WriteMemoryWord(aligned_addr, new_value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SW(inst.bits, new_value, addr);
PGXP::CPU_SW(inst.bits, aligned_addr, new_value);
}
break;
case InstructionOp::j:
{
g_state.next_instruction_is_branch_delay_slot = true;
Branch((g_state.regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
}
break;
case InstructionOp::jal:
{
WriteReg(Reg::ra, g_state.regs.npc);
WriteReg(Reg::ra, g_state.npc);
g_state.next_instruction_is_branch_delay_slot = true;
Branch((g_state.regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
}
break;
@ -1472,7 +1476,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (ReadReg(inst.i.rs) == ReadReg(inst.i.rt));
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1481,7 +1485,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (ReadReg(inst.i.rs) != ReadReg(inst.i.rt));
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1490,7 +1494,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (static_cast<s32>(ReadReg(inst.i.rs)) > 0);
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1499,7 +1503,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (static_cast<s32>(ReadReg(inst.i.rs)) <= 0);
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1515,10 +1519,10 @@ restart_instruction:
// register is still linked even if the branch isn't taken
const bool link = (rt & u8(0x1E)) == u8(0x10);
if (link)
WriteReg(Reg::ra, g_state.regs.npc);
WriteReg(Reg::ra, g_state.npc);
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1610,7 +1614,7 @@ restart_instruction:
WriteRegDelayed(inst.r.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_CFC2(inst.bits, value, value);
PGXP::CPU_MFC2(inst.bits, value);
}
break;
@ -1620,7 +1624,7 @@ restart_instruction:
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()) + 32, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_CTC2(inst.bits, value, value);
PGXP::CPU_MTC2(inst.bits, value);
}
break;
@ -1630,7 +1634,7 @@ restart_instruction:
WriteRegDelayed(inst.r.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_MFC2(inst.bits, value, value);
PGXP::CPU_MFC2(inst.bits, value);
}
break;
@ -1640,7 +1644,7 @@ restart_instruction:
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()), value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_MTC2(inst.bits, value, value);
PGXP::CPU_MTC2(inst.bits, value);
}
break;
@ -1674,7 +1678,7 @@ restart_instruction:
GTE::WriteRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())), value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LWC2(inst.bits, value, addr);
PGXP::CPU_LWC2(inst.bits, addr, value);
}
break;
@ -1694,7 +1698,7 @@ restart_instruction:
WriteMemoryWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SWC2(inst.bits, value, addr);
PGXP::CPU_SWC2(inst.bits, addr, value);
}
break;
@ -1734,7 +1738,7 @@ void DispatchInterrupt()
{
// If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
SafeReadInstruction(g_state.pc, &g_state.next_instruction.bits);
if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction())
{
StallUntilGTEComplete();
@ -1745,7 +1749,10 @@ void DispatchInterrupt()
RaiseException(
Cop0Registers::CAUSE::MakeValueForException(Exception::INT, g_state.next_instruction_is_branch_delay_slot,
g_state.branch_was_taken, g_state.next_instruction.cop.cop_n),
g_state.regs.pc);
g_state.pc);
// Fix up downcount, the pending IRQ set it to zero.
TimingEvents::UpdateCPUDowncount();
}
void UpdateDebugDispatcherFlag()
@ -1763,14 +1770,16 @@ void UpdateDebugDispatcherFlag()
Log_DevPrintf("%s debug dispatcher", use_debug_dispatcher ? "Now using" : "No longer using");
g_state.use_debug_dispatcher = use_debug_dispatcher;
ForceDispatcherExit();
ExitExecution();
}
void ForceDispatcherExit()
void ExitExecution()
{
// zero the downcount so we break out and switch
g_state.downcount = 0;
g_state.frame_done = true;
// can't exit while running events without messing things up
if (TimingEvents::IsRunningEvents())
TimingEvents::SetFrameDone();
else
fastjmp_jmp(&s_jmp_buf, 1);
}
bool HasAnyBreakpoints()
@ -1869,7 +1878,7 @@ void ClearBreakpoints()
bool AddStepOverBreakpoint()
{
u32 bp_pc = g_state.regs.pc;
u32 bp_pc = g_state.pc;
Instruction inst;
if (!SafeReadInstruction(bp_pc, &inst.bits))
@ -1880,7 +1889,7 @@ bool AddStepOverBreakpoint()
if (!IsCallInstruction(inst))
{
Host::ReportFormattedDebuggerMessage(Host::TranslateString("DebuggerMessage", "0x%08X is not a call instruction."),
g_state.regs.pc);
g_state.pc);
return false;
}
@ -1890,7 +1899,7 @@ bool AddStepOverBreakpoint()
if (IsBranchInstruction(inst))
{
Host::ReportFormattedDebuggerMessage(
Host::TranslateString("DebuggerMessage", "Can't step over double branch at 0x%08X"), g_state.regs.pc);
Host::TranslateString("DebuggerMessage", "Can't step over double branch at 0x%08X"), g_state.pc);
return false;
}
@ -1905,7 +1914,7 @@ bool AddStepOverBreakpoint()
bool AddStepOutBreakpoint(u32 max_instructions_to_search)
{
// find the branch-to-ra instruction.
u32 ret_pc = g_state.regs.pc;
u32 ret_pc = g_state.pc;
for (u32 i = 0; i < max_instructions_to_search; i++)
{
ret_pc += sizeof(Instruction);
@ -1929,21 +1938,24 @@ bool AddStepOutBreakpoint(u32 max_instructions_to_search)
Host::ReportFormattedDebuggerMessage(
Host::TranslateString("DebuggerMessage", "No return instruction found after %u instructions for step-out at %08X."),
max_instructions_to_search, g_state.regs.pc);
max_instructions_to_search, g_state.pc);
return false;
}
ALWAYS_INLINE_RELEASE static bool BreakpointCheck()
{
const u32 pc = g_state.regs.pc;
const u32 pc = g_state.pc;
// single step - we want to break out after this instruction, so set a pending exit
// the bp check happens just before execution, so this is fine
if (s_single_step)
{
ForceDispatcherExit();
s_single_step = false;
if (s_single_step_done)
ExitExecution();
else
s_single_step_done = true;
s_last_breakpoint_check_pc = pc;
return false;
}
@ -2004,19 +2016,14 @@ ALWAYS_INLINE_RELEASE static bool BreakpointCheck()
}
template<PGXPMode pgxp_mode, bool debug>
static void ExecuteImpl()
[[noreturn]] static void ExecuteImpl()
{
g_using_interpreter = true;
g_state.frame_done = false;
while (!g_state.frame_done)
for (;;)
{
TimingEvents::UpdateCPUDowncount();
TimingEvents::RunEvents();
while (g_state.pending_ticks < g_state.downcount)
{
if (HasPendingInterrupt() && !g_state.interrupt_delay)
DispatchInterrupt();
if constexpr (debug)
{
Cop0ExecutionBreakpointCheck();
@ -2028,12 +2035,11 @@ static void ExecuteImpl()
}
}
g_state.interrupt_delay = false;
g_state.pending_ticks++;
// now executing the instruction we previously fetched
g_state.current_instruction.bits = g_state.next_instruction.bits;
g_state.current_instruction_pc = g_state.regs.pc;
g_state.current_instruction_pc = g_state.pc;
g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot;
g_state.current_instruction_was_branch_taken = g_state.branch_was_taken;
g_state.next_instruction_is_branch_delay_slot = false;
@ -2065,27 +2071,10 @@ static void ExecuteImpl()
// next load delay
UpdateLoadDelay();
}
TimingEvents::RunEvents();
}
}
void Execute()
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU, false>();
else
ExecuteImpl<PGXPMode::Memory, false>();
}
else
{
ExecuteImpl<PGXPMode::Disabled, false>();
}
}
void ExecuteDebug()
static void ExecuteDebug()
{
if (g_settings.gpu_pgxp_enable)
{
@ -2100,11 +2089,56 @@ void ExecuteDebug()
}
}
void Execute()
{
const CPUExecutionMode exec_mode = g_settings.cpu_execution_mode;
const bool use_debug_dispatcher = g_state.use_debug_dispatcher;
if (fastjmp_set(&s_jmp_buf) != 0)
{
// Before we return, set npc to pc so that we can switch from recs to int.
if (exec_mode != CPUExecutionMode::Interpreter && !use_debug_dispatcher)
g_state.npc = g_state.pc;
return;
}
if (use_debug_dispatcher)
{
ExecuteDebug();
return;
}
switch (exec_mode)
{
case CPUExecutionMode::Recompiler:
case CPUExecutionMode::CachedInterpreter:
CodeCache::Execute();
break;
case CPUExecutionMode::Interpreter:
default:
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU, false>();
else
ExecuteImpl<PGXPMode::Memory, false>();
}
else
{
ExecuteImpl<PGXPMode::Disabled, false>();
}
}
break;
}
}
void SingleStep()
{
s_single_step = true;
ExecuteDebug();
Host::ReportFormattedDebuggerMessage("Stepped to 0x%08X.", g_state.regs.pc);
if (fastjmp_set(&s_jmp_buf) == 0)
ExecuteDebug();
Host::ReportFormattedDebuggerMessage("Stepped to 0x%08X.", g_state.pc);
}
namespace CodeCache {
@ -2113,8 +2147,8 @@ template<PGXPMode pgxp_mode>
void InterpretCachedBlock(const CodeBlock& block)
{
// set up the state so we've already fetched the instruction
DebugAssert(g_state.regs.pc == block.GetPC());
g_state.regs.npc = block.GetPC() + 4;
DebugAssert(g_state.pc == block.GetPC());
g_state.npc = block.GetPC() + 4;
for (const CodeBlockInstruction& cbi : block.instructions)
{
@ -2129,8 +2163,8 @@ void InterpretCachedBlock(const CodeBlock& block)
g_state.exception_raised = false;
// update pc
g_state.regs.pc = g_state.regs.npc;
g_state.regs.npc += 4;
g_state.pc = g_state.npc;
g_state.npc += 4;
// execute the instruction we previously fetched
ExecuteInstruction<pgxp_mode, false>();
@ -2153,7 +2187,7 @@ template void InterpretCachedBlock<PGXPMode::CPU>(const CodeBlock& block);
template<PGXPMode pgxp_mode>
void InterpretUncachedBlock()
{
g_state.regs.npc = g_state.regs.pc;
g_state.npc = g_state.pc;
if (!FetchInstructionForInterpreterFallback())
return;
@ -2166,7 +2200,7 @@ void InterpretUncachedBlock()
// now executing the instruction we previously fetched
g_state.current_instruction.bits = g_state.next_instruction.bits;
g_state.current_instruction_pc = g_state.regs.pc;
g_state.current_instruction_pc = g_state.pc;
g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot;
g_state.current_instruction_was_branch_taken = g_state.branch_was_taken;
g_state.next_instruction_is_branch_delay_slot = false;
@ -2182,7 +2216,7 @@ void InterpretUncachedBlock()
}
else
{
g_state.regs.pc = g_state.regs.npc;
g_state.pc = g_state.npc;
}
// execute the instruction we previously fetched