CPU/CodeCache: Always dynamically allocate code buffer
Reduces .bss size.
This commit is contained in:
@@ -123,22 +123,27 @@ PerfScope MIPSPerfScope("MIPS");
|
||||
|
||||
#endif
|
||||
|
||||
// Currently remapping the code buffer doesn't work in macOS. TODO: Make dynamic instead...
|
||||
#ifndef __APPLE__
|
||||
#define USE_STATIC_CODE_BUFFER 1
|
||||
#endif
|
||||
|
||||
#if defined(CPU_ARCH_ARM32)
|
||||
// Use a smaller code buffer size on AArch32 to have a better chance of being in range.
|
||||
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 16 * 1024 * 1024;
|
||||
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 8 * 1024 * 1024;
|
||||
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 20 * 1024 * 1024;
|
||||
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 4 * 1024 * 1024;
|
||||
#else
|
||||
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 32 * 1024 * 1024;
|
||||
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 48 * 1024 * 1024;
|
||||
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 16 * 1024 * 1024;
|
||||
#endif
|
||||
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
alignas(HOST_PAGE_SIZE) static u8 s_code_storage[RECOMPILER_CODE_CACHE_SIZE + RECOMPILER_FAR_CODE_CACHE_SIZE];
|
||||
// On Linux ARM32/ARM64, we use a dedicated section in the ELF for storing code.
|
||||
// This is because without ASLR, or on certain ASLR offsets, the sbrk() heap ends up immediately following the text/data
|
||||
// sections, which means there isn't a large enough gap to fit within range on ARM32.
|
||||
#if defined(__linux__) && (defined(CPU_ARCH_ARM32) || defined(CPU_ARCH_ARM64))
|
||||
#define USE_CODE_BUFFER_SECTION 1
|
||||
#ifdef __clang__
|
||||
#pragma clang section bss = ".jitstorage"
|
||||
__attribute__((aligned(HOST_PAGE_SIZE))) static u8 s_code_buffer_ptr[RECOMPILER_CODE_CACHE_SIZE];
|
||||
#pragma clang section bss = ""
|
||||
#endif
|
||||
#else
|
||||
static u8* s_code_buffer_ptr = nullptr;
|
||||
#endif
|
||||
|
||||
static JitCodeBuffer s_code_buffer;
|
||||
@@ -162,20 +167,26 @@ bool CPU::CodeCache::IsUsingFastmem()
|
||||
|
||||
bool CPU::CodeCache::ProcessStartup(Error* error)
|
||||
{
|
||||
AllocateLUTs();
|
||||
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
const bool has_buffer =
|
||||
s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE, HOST_PAGE_SIZE);
|
||||
#ifdef USE_CODE_BUFFER_SECTION
|
||||
const u8* module_base = static_cast<const u8*>(MemMap::GetBaseAddress());
|
||||
INFO_LOG("Using JIT buffer section of size {} at {} (0x{:X} bytes / {} MB away)", sizeof(s_code_buffer_ptr),
|
||||
static_cast<void*>(s_code_buffer_ptr), std::abs(static_cast<ptrdiff_t>(s_code_buffer_ptr - module_base)),
|
||||
(std::abs(static_cast<ptrdiff_t>(s_code_buffer_ptr - module_base)) + (1024 * 1024 - 1)) / (1024 * 1024));
|
||||
const bool code_buffer_allocated =
|
||||
MemMap::MemProtect(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE, PageProtect::ReadWriteExecute);
|
||||
#else
|
||||
const bool has_buffer = false;
|
||||
s_code_buffer_ptr = static_cast<u8*>(MemMap::AllocateJITMemory(RECOMPILER_CODE_CACHE_SIZE));
|
||||
const bool code_buffer_allocated = (s_code_buffer_ptr != nullptr);
|
||||
#endif
|
||||
if (!has_buffer && !s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
|
||||
if (!code_buffer_allocated) [[unlikely]]
|
||||
{
|
||||
Error::SetStringView(error, "Failed to initialize code space");
|
||||
Error::SetStringView(error, "Failed to allocate code storage. The log may contain more information, you will need "
|
||||
"to run DuckStation with -earlyconsole in the command line.");
|
||||
return false;
|
||||
}
|
||||
|
||||
AllocateLUTs();
|
||||
|
||||
if (!PageFaultHandler::Install(error))
|
||||
return false;
|
||||
|
||||
@@ -184,17 +195,21 @@ bool CPU::CodeCache::ProcessStartup(Error* error)
|
||||
|
||||
void CPU::CodeCache::ProcessShutdown()
|
||||
{
|
||||
s_code_buffer.Destroy();
|
||||
DeallocateLUTs();
|
||||
|
||||
#ifndef USE_CODE_BUFFER_SECTION
|
||||
MemMap::ReleaseJITMemory(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE);
|
||||
#endif
|
||||
}
|
||||
|
||||
void CPU::CodeCache::Initialize()
|
||||
{
|
||||
Assert(s_blocks.empty());
|
||||
|
||||
// TODO: Reduce far code size when not using memory exceptions.
|
||||
if (IsUsingAnyRecompiler())
|
||||
{
|
||||
s_code_buffer.Reset();
|
||||
s_code_buffer.Reset(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE);
|
||||
CompileASMFunctions();
|
||||
ResetCodeLUT();
|
||||
}
|
||||
@@ -219,7 +234,7 @@ void CPU::CodeCache::Reset()
|
||||
if (IsUsingAnyRecompiler())
|
||||
{
|
||||
ClearASMFunctions();
|
||||
s_code_buffer.Reset();
|
||||
s_code_buffer.Reset(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE);
|
||||
CompileASMFunctions();
|
||||
ResetCodeLUT();
|
||||
}
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "common/memmap.h"
|
||||
|
||||
#include "cpu_code_cache_private.h"
|
||||
#include "cpu_core.h"
|
||||
@@ -171,7 +172,7 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
|
||||
}
|
||||
|
||||
if (flush_icache)
|
||||
JitCodeBuffer::FlushInstructionCache(code, kA32InstructionSizeInBytes);
|
||||
MemMap::FlushInstructionCache(code, kA32InstructionSizeInBytes);
|
||||
|
||||
return kA32InstructionSizeInBytes;
|
||||
}
|
||||
@@ -202,7 +203,7 @@ u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)
|
||||
s_trampoline_targets.emplace(target, offset);
|
||||
s_trampoline_used = offset + static_cast<u32>(size);
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(start, size);
|
||||
MemMap::FlushInstructionCache(start, size);
|
||||
return start;
|
||||
}
|
||||
|
||||
@@ -1790,7 +1791,7 @@ void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::Loadstore
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
MemMap::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "common/memmap.h"
|
||||
|
||||
#include "cpu_code_cache_private.h"
|
||||
#include "cpu_core.h"
|
||||
#include "cpu_core_private.h"
|
||||
@@ -274,7 +276,7 @@ u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)
|
||||
s_trampoline_targets.emplace(target, offset);
|
||||
s_trampoline_used = offset + static_cast<u32>(size);
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(start, size);
|
||||
MemMap::FlushInstructionCache(start, size);
|
||||
return start;
|
||||
}
|
||||
|
||||
@@ -316,7 +318,7 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
|
||||
const u32 new_code = B | Assembler::ImmUncondBranch(disp);
|
||||
std::memcpy(code, &new_code, sizeof(new_code));
|
||||
if (flush_icache)
|
||||
JitCodeBuffer::FlushInstructionCache(code, kInstructionSize);
|
||||
MemMap::FlushInstructionCache(code, kInstructionSize);
|
||||
|
||||
return kInstructionSize;
|
||||
}
|
||||
@@ -2100,7 +2102,7 @@ void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::Loadstore
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
MemMap::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "common/memmap.h"
|
||||
|
||||
#ifdef CPU_ARCH_X64
|
||||
|
||||
@@ -1768,15 +1769,8 @@ void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
|
||||
|
||||
void CodeGenerator::EmitCall(const void* ptr)
|
||||
{
|
||||
if (Xbyak::inner::IsInInt32(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())))
|
||||
{
|
||||
m_emit->call(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(GetHostReg64(RRETURN), reinterpret_cast<size_t>(ptr));
|
||||
m_emit->call(GetHostReg64(RRETURN));
|
||||
}
|
||||
DebugAssert(Xbyak::inner::IsInInt32(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())));
|
||||
m_emit->call(ptr);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
|
||||
@@ -2530,7 +2524,7 @@ void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::Loadstore
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
cg.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
MemMap::FlushInstructionCache(host_pc, lbi.code_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
|
||||
@@ -318,6 +318,34 @@ void System::CheckCacheLineSize()
|
||||
}
|
||||
}
|
||||
|
||||
bool System::Internal::ProcessStartup(Error* error)
|
||||
{
|
||||
Common::Timer timer;
|
||||
|
||||
// Allocate JIT memory as soon as possible.
|
||||
if (!CPU::CodeCache::ProcessStartup(error))
|
||||
return false;
|
||||
|
||||
// Fastmem alloc *must* come after JIT alloc, otherwise it tends to eat the 4GB region after the executable on MacOS.
|
||||
if (!Bus::AllocateMemory(error))
|
||||
{
|
||||
CPU::CodeCache::ProcessShutdown();
|
||||
return false;
|
||||
}
|
||||
|
||||
VERBOSE_LOG("Memory allocation took {} ms.", timer.GetTimeMilliseconds());
|
||||
|
||||
CheckCacheLineSize();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void System::Internal::ProcessShutdown()
|
||||
{
|
||||
Bus::ReleaseMemory();
|
||||
CPU::CodeCache::ProcessShutdown();
|
||||
}
|
||||
|
||||
bool System::Internal::CPUThreadInitialize(Error* error)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
@@ -332,17 +360,9 @@ bool System::Internal::CPUThreadInitialize(Error* error)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!CPU::CodeCache::ProcessStartup(error) || !Bus::AllocateMemory(error))
|
||||
{
|
||||
CPUThreadShutdown();
|
||||
return false;
|
||||
}
|
||||
|
||||
// This will call back to Host::LoadSettings() -> ReloadSources().
|
||||
LoadSettings(false);
|
||||
|
||||
CheckCacheLineSize();
|
||||
|
||||
#ifdef ENABLE_RAINTEGRATION
|
||||
if (Host::GetBaseBoolSettingValue("Cheevos", "UseRAIntegration", false))
|
||||
Achievements::SwitchToRAIntegration();
|
||||
@@ -377,9 +397,6 @@ void System::Internal::CPUThreadShutdown()
|
||||
|
||||
InputManager::CloseSources();
|
||||
|
||||
CPU::CodeCache::ProcessShutdown();
|
||||
Bus::ReleaseMemory();
|
||||
|
||||
#ifdef _WIN32
|
||||
CoUninitialize();
|
||||
#endif
|
||||
|
||||
@@ -504,10 +504,16 @@ namespace Internal {
|
||||
/// Performs mandatory hardware checks.
|
||||
bool PerformEarlyHardwareChecks(Error* error);
|
||||
|
||||
/// Called on process startup.
|
||||
bool CPUThreadInitialize(Error* error);
|
||||
/// Called on process startup, as early as possible.
|
||||
bool ProcessStartup(Error* error);
|
||||
|
||||
/// Called on process shutdown.
|
||||
void ProcessShutdown();
|
||||
|
||||
/// Called on CPU thread initialization.
|
||||
bool CPUThreadInitialize(Error* error);
|
||||
|
||||
/// Called on CPU thread shutdown.
|
||||
void CPUThreadShutdown();
|
||||
|
||||
/// Polls input, updates subsystems which are present while paused/inactive.
|
||||
|
||||
Reference in New Issue
Block a user