GPUDevice: Purge threaded presentation

Worse frame pacing, and GPU thread (when I finish it) will give
significantly faster performance on mobile anyway.
This commit is contained in:
Stenzek
2024-09-07 12:15:42 +10:00
parent c5dd48474f
commit 1c1b82ed66
20 changed files with 65 additions and 235 deletions

View File

@ -63,9 +63,8 @@ bool D3D11Device::HasSurface() const
return static_cast<bool>(m_swap_chain);
}
bool D3D11Device::CreateDevice(std::string_view adapter, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
Error* error)
bool D3D11Device::CreateDevice(std::string_view adapter, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error)
{
std::unique_lock lock(s_instance_mutex);

View File

@ -112,9 +112,8 @@ public:
void UnbindTexture(D3D11Texture* tex);
protected:
bool CreateDevice(std::string_view adapter, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
Error* error) override;
bool CreateDevice(std::string_view adapter, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error) override;
void DestroyDevice() override;
private:

View File

@ -117,9 +117,8 @@ D3D12Device::ComPtr<ID3D12RootSignature> D3D12Device::CreateRootSignature(const
return rs;
}
bool D3D12Device::CreateDevice(std::string_view adapter, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
Error* error)
bool D3D12Device::CreateDevice(std::string_view adapter, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error)
{
std::unique_lock lock(s_instance_mutex);

View File

@ -185,9 +185,8 @@ public:
void UnbindTextureBuffer(D3D12TextureBuffer* buf);
protected:
bool CreateDevice(std::string_view adapter, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
Error* error) override;
bool CreateDevice(std::string_view adapter, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error) override;
void DestroyDevice() override;
bool ReadPipelineCache(std::optional<DynamicHeapArray<u8>> data) override;

View File

@ -347,7 +347,7 @@ GPUDevice::AdapterInfoList GPUDevice::GetAdapterListForAPI(RenderAPI api)
}
bool GPUDevice::Create(std::string_view adapter, std::string_view shader_cache_path, u32 shader_cache_version,
bool debug_device, GPUVSyncMode vsync, bool allow_present_throttle, bool threaded_presentation,
bool debug_device, GPUVSyncMode vsync, bool allow_present_throttle,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features, Error* error)
{
m_vsync_mode = vsync;
@ -360,7 +360,7 @@ bool GPUDevice::Create(std::string_view adapter, std::string_view shader_cache_p
return false;
}
if (!CreateDevice(adapter, threaded_presentation, exclusive_fullscreen_control, disabled_features, error))
if (!CreateDevice(adapter, exclusive_fullscreen_control, disabled_features, error))
{
if (error && !error->IsValid())
error->SetStringView("Failed to create device.");

View File

@ -604,8 +604,8 @@ public:
virtual RenderAPI GetRenderAPI() const = 0;
bool Create(std::string_view adapter, std::string_view shader_cache_path, u32 shader_cache_version, bool debug_device,
GPUVSyncMode vsync, bool allow_present_throttle, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features, Error* error);
GPUVSyncMode vsync, bool allow_present_throttle, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error);
void Destroy();
virtual bool HasSurface() const = 0;
@ -737,9 +737,8 @@ public:
static void ResetStatistics();
protected:
virtual bool CreateDevice(std::string_view adapter, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
Error* error) = 0;
virtual bool CreateDevice(std::string_view adapter, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error) = 0;
virtual void DestroyDevice() = 0;
std::string GetShaderCacheBaseName(std::string_view type) const;

View File

@ -287,9 +287,8 @@ public:
static void DeferRelease(u64 fence_counter, id obj);
protected:
bool CreateDevice(std::string_view adapter, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
Error* error) override;
bool CreateDevice(std::string_view adapter, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error) override;
void DestroyDevice() override;
private:

View File

@ -161,9 +161,8 @@ void MetalDevice::SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle)
[m_layer setDisplaySyncEnabled:m_vsync_mode == GPUVSyncMode::FIFO];
}
bool MetalDevice::CreateDevice(std::string_view adapter, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
Error* error)
bool MetalDevice::CreateDevice(std::string_view adapter, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error)
{
@autoreleasepool
{

View File

@ -281,9 +281,8 @@ bool OpenGLDevice::HasSurface() const
return m_window_info.type != WindowInfo::Type::Surfaceless;
}
bool OpenGLDevice::CreateDevice(std::string_view adapter, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
Error* error)
bool OpenGLDevice::CreateDevice(std::string_view adapter, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error)
{
m_gl_context = OpenGLContext::Create(m_window_info, error);
if (!m_gl_context)

View File

@ -115,12 +115,14 @@ public:
void CommitRTClearInFB(OpenGLTexture* tex, u32 idx);
void CommitDSClearInFB(OpenGLTexture* tex);
GLuint LookupProgramCache(const OpenGLPipeline::ProgramCacheKey& key, const GPUPipeline::GraphicsConfig& plconfig, Error* error);
GLuint LookupProgramCache(const OpenGLPipeline::ProgramCacheKey& key, const GPUPipeline::GraphicsConfig& plconfig,
Error* error);
GLuint CompileProgram(const GPUPipeline::GraphicsConfig& plconfig, Error* error);
void PostLinkProgram(const GPUPipeline::GraphicsConfig& plconfig, GLuint program_id);
void UnrefProgram(const OpenGLPipeline::ProgramCacheKey& key);
OpenGLPipeline::VertexArrayCache::const_iterator LookupVAOCache(const OpenGLPipeline::VertexArrayCacheKey& key, Error* error);
OpenGLPipeline::VertexArrayCache::const_iterator LookupVAOCache(const OpenGLPipeline::VertexArrayCacheKey& key,
Error* error);
GLuint CreateVAO(std::span<const GPUPipeline::VertexAttribute> attributes, u32 stride, Error* error);
void UnrefVAO(const OpenGLPipeline::VertexArrayCacheKey& key);
@ -132,9 +134,8 @@ public:
void UnbindPipeline(const OpenGLPipeline* pl);
protected:
bool CreateDevice(std::string_view adapter, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
Error* error) override;
bool CreateDevice(std::string_view adapter, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error) override;
void DestroyDevice() override;
bool OpenPipelineCache(const std::string& filename) override;

View File

@ -1268,7 +1268,6 @@ void VulkanDevice::WaitForFenceCounter(u64 fence_counter)
void VulkanDevice::WaitForGPUIdle()
{
WaitForPresentComplete();
vkDeviceWaitIdle(m_device);
}
@ -1287,13 +1286,6 @@ bool VulkanDevice::SetGPUTimingEnabled(bool enabled)
void VulkanDevice::WaitForCommandBufferCompletion(u32 index)
{
// We might be waiting for the buffer we just submitted to the worker thread.
if (m_queued_present.command_buffer_index == index && !m_present_done.load(std::memory_order_acquire))
{
WARNING_LOG("Waiting for threaded submission of cmdbuffer {}", index);
WaitForPresentComplete();
}
// Wait for this command buffer to be completed.
static constexpr u32 MAX_TIMEOUTS = 10;
u32 timeouts = 0;
@ -1311,7 +1303,7 @@ void VulkanDevice::WaitForCommandBufferCompletion(u32 index)
else if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, TinyString::from_format("vkWaitForFences() for cmdbuffer {} failed: ", index));
m_last_submit_failed.store(true, std::memory_order_release);
m_device_is_lost = true;
return;
}
}
@ -1363,10 +1355,9 @@ void VulkanDevice::WaitForCommandBufferCompletion(u32 index)
}
}
void VulkanDevice::EndAndSubmitCommandBuffer(VulkanSwapChain* present_swap_chain, bool explicit_present,
bool submit_on_thread)
void VulkanDevice::EndAndSubmitCommandBuffer(VulkanSwapChain* present_swap_chain, bool explicit_present)
{
if (m_last_submit_failed.load(std::memory_order_acquire))
if (m_device_is_lost)
return;
CommandBuffer& resources = m_frame_resources[m_current_frame];
@ -1399,27 +1390,6 @@ void VulkanDevice::EndAndSubmitCommandBuffer(VulkanSwapChain* present_swap_chain
// This command buffer now has commands, so can't be re-used without waiting.
resources.needs_fence_wait = true;
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
if (!submit_on_thread || explicit_present || !m_present_thread.joinable())
{
DoSubmitCommandBuffer(m_current_frame, present_swap_chain);
if (present_swap_chain && !explicit_present)
DoPresent(present_swap_chain);
return;
}
m_queued_present.command_buffer_index = m_current_frame;
m_queued_present.swap_chain = present_swap_chain;
m_present_done.store(false, std::memory_order_release);
m_present_queued_cv.notify_one();
}
void VulkanDevice::DoSubmitCommandBuffer(u32 index, VulkanSwapChain* present_swap_chain)
{
CommandBuffer& resources = m_frame_resources[index];
uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
nullptr,
@ -1442,16 +1412,19 @@ void VulkanDevice::DoSubmitCommandBuffer(u32 index, VulkanSwapChain* present_swa
submit_info.signalSemaphoreCount = 1;
}
const VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: ");
m_last_submit_failed.store(true, std::memory_order_release);
m_device_is_lost = true;
return;
}
if (present_swap_chain && !explicit_present)
QueuePresent(present_swap_chain);
}
void VulkanDevice::DoPresent(VulkanSwapChain* present_swap_chain)
void VulkanDevice::QueuePresent(VulkanSwapChain* present_swap_chain)
{
const VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
nullptr,
@ -1482,65 +1455,6 @@ void VulkanDevice::DoPresent(VulkanSwapChain* present_swap_chain)
present_swap_chain->AcquireNextImage();
}
void VulkanDevice::WaitForPresentComplete()
{
if (m_present_done.load(std::memory_order_acquire))
return;
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
}
void VulkanDevice::WaitForPresentComplete(std::unique_lock<std::mutex>& lock)
{
if (m_present_done.load(std::memory_order_acquire))
return;
m_present_done_cv.wait(lock, [this]() { return m_present_done.load(std::memory_order_acquire); });
}
void VulkanDevice::PresentThread()
{
std::unique_lock<std::mutex> lock(m_present_mutex);
while (!m_present_thread_done.load(std::memory_order_acquire))
{
m_present_queued_cv.wait(lock, [this]() {
return !m_present_done.load(std::memory_order_acquire) || m_present_thread_done.load(std::memory_order_acquire);
});
if (m_present_done.load(std::memory_order_acquire))
continue;
DoSubmitCommandBuffer(m_queued_present.command_buffer_index, m_queued_present.swap_chain);
if (m_queued_present.swap_chain)
DoPresent(m_queued_present.swap_chain);
m_present_done.store(true, std::memory_order_release);
m_present_done_cv.notify_one();
}
}
void VulkanDevice::StartPresentThread()
{
DebugAssert(!m_present_thread.joinable());
m_present_thread_done.store(false, std::memory_order_release);
m_present_thread = std::thread(&VulkanDevice::PresentThread, this);
}
void VulkanDevice::StopPresentThread()
{
if (!m_present_thread.joinable())
return;
{
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
m_present_thread_done.store(true, std::memory_order_release);
m_present_queued_cv.notify_one();
}
m_present_thread.join();
}
void VulkanDevice::MoveToNextCommandBuffer()
{
BeginCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS);
@ -1602,7 +1516,7 @@ void VulkanDevice::SubmitCommandBuffer(bool wait_for_completion)
DebugAssert(!InRenderPass());
const u32 current_frame = m_current_frame;
EndAndSubmitCommandBuffer(nullptr, false, false);
EndAndSubmitCommandBuffer(nullptr, false);
MoveToNextCommandBuffer();
if (wait_for_completion)
@ -1629,11 +1543,6 @@ void VulkanDevice::SubmitCommandBufferAndRestartRenderPass(const std::string_vie
BeginRenderPass();
}
bool VulkanDevice::CheckLastSubmitFail()
{
return m_last_submit_failed.load(std::memory_order_acquire);
}
void VulkanDevice::DeferBufferDestruction(VkBuffer object, VmaAllocation allocation)
{
m_cleanup_objects.emplace_back(GetCurrentFenceCounter(),
@ -1987,9 +1896,8 @@ bool VulkanDevice::HasSurface() const
return static_cast<bool>(m_swap_chain);
}
bool VulkanDevice::CreateDevice(std::string_view adapter, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
Error* error)
bool VulkanDevice::CreateDevice(std::string_view adapter, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error)
{
std::unique_lock lock(s_instance_mutex);
bool enable_debug_utils = m_debug_device;
@ -2097,9 +2005,6 @@ bool VulkanDevice::CreateDevice(std::string_view adapter, bool threaded_presenta
if (!CreateAllocator() || !CreatePersistentDescriptorPool() || !CreateCommandBuffers() || !CreatePipelineLayouts())
return false;
if (threaded_presentation)
StartPresentThread();
m_exclusive_fullscreen_control = exclusive_fullscreen_control;
if (surface != VK_NULL_HANDLE)
@ -2148,7 +2053,6 @@ void VulkanDevice::DestroyDevice()
if (m_device != VK_NULL_HANDLE)
WaitForGPUIdle();
StopPresentThread();
m_swap_chain.reset();
if (m_null_texture)
@ -2451,11 +2355,8 @@ bool VulkanDevice::BeginPresent(bool frame_skip, u32 clear_color)
return false;
}
// Previous frame needs to be presented before we can acquire the swap chain.
WaitForPresentComplete();
// Check if the device was lost.
if (CheckLastSubmitFail())
if (m_device_is_lost)
{
Panic("Fixme"); // TODO
TrimTexturePool();
@ -2511,7 +2412,7 @@ void VulkanDevice::EndPresent(bool explicit_present)
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, m_swap_chain->GetCurrentImage(), GPUTexture::Type::RenderTarget,
0, 1, 0, 1, VulkanTexture::Layout::ColorAttachment,
VulkanTexture::Layout::PresentSrc);
EndAndSubmitCommandBuffer(m_swap_chain.get(), explicit_present, !m_swap_chain->IsPresentModeSynchronizing());
EndAndSubmitCommandBuffer(m_swap_chain.get(), explicit_present);
MoveToNextCommandBuffer();
InvalidateCachedState();
TrimTexturePool();
@ -2520,7 +2421,7 @@ void VulkanDevice::EndPresent(bool explicit_present)
void VulkanDevice::SubmitPresent()
{
DebugAssert(m_swap_chain);
DoPresent(m_swap_chain.get());
QueuePresent(m_swap_chain.get());
}
#ifdef _DEBUG
@ -3185,7 +3086,7 @@ void VulkanDevice::RenderBlankFrame()
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1,
VulkanTexture::Layout::TransferDst, VulkanTexture::Layout::PresentSrc);
EndAndSubmitCommandBuffer(m_swap_chain.get(), false, !m_swap_chain->IsPresentModeSynchronizing());
EndAndSubmitCommandBuffer(m_swap_chain.get(), false);
MoveToNextCommandBuffer();
InvalidateCachedState();

View File

@ -17,9 +17,7 @@
#include <deque>
#include <functional>
#include <memory>
#include <mutex>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>
@ -234,9 +232,8 @@ public:
void UnbindTextureBuffer(VulkanTextureBuffer* buf);
protected:
bool CreateDevice(std::string_view adapter, bool threaded_presentation,
std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
Error* error) override;
bool CreateDevice(std::string_view adapter, std::optional<bool> exclusive_fullscreen_control,
FeatureMask disabled_features, Error* error) override;
void DestroyDevice() override;
bool ReadPipelineCache(std::optional<DynamicHeapArray<u8>> data) override;
@ -329,11 +326,6 @@ private:
bool IsDeviceImgTec() const;
bool IsBrokenMobileDriver() const;
void EndAndSubmitCommandBuffer(VulkanSwapChain* present_swap_chain, bool explicit_present, bool submit_on_thread);
void MoveToNextCommandBuffer();
void WaitForPresentComplete();
bool CheckLastSubmitFail();
using ExtensionList = std::vector<const char*>;
static bool SelectInstanceExtensions(ExtensionList* extension_list, const WindowInfo& wi, OptionalExtensions* oe,
bool enable_debug_utils);
@ -395,13 +387,9 @@ private:
void BeginCommandBuffer(u32 index);
void WaitForCommandBufferCompletion(u32 index);
void DoSubmitCommandBuffer(u32 index, VulkanSwapChain* present_swap_chain);
void DoPresent(VulkanSwapChain* present_swap_chain);
void WaitForPresentComplete(std::unique_lock<std::mutex>& lock);
void PresentThread();
void StartPresentThread();
void StopPresentThread();
void EndAndSubmitCommandBuffer(VulkanSwapChain* present_swap_chain, bool explicit_present);
void MoveToNextCommandBuffer();
void QueuePresent(VulkanSwapChain* present_swap_chain);
VkInstance m_instance = VK_NULL_HANDLE;
VkPhysicalDevice m_physical_device = VK_NULL_HANDLE;
@ -426,21 +414,7 @@ private:
u64 m_completed_fence_counter = 0;
u32 m_current_frame = 0;
std::atomic_bool m_last_submit_failed{false};
std::atomic_bool m_present_done{true};
std::mutex m_present_mutex;
std::condition_variable m_present_queued_cv;
std::condition_variable m_present_done_cv;
std::thread m_present_thread;
std::atomic_bool m_present_thread_done{false};
struct QueuedPresent
{
VulkanSwapChain* swap_chain;
u32 command_buffer_index;
};
QueuedPresent m_queued_present = {nullptr, 0xFFFFFFFFu};
bool m_device_is_lost = false;
std::unordered_map<RenderPassCacheKey, VkRenderPass, RenderPassCacheKeyHash> m_render_pass_cache;
GPUFramebufferManager<VkFramebuffer, CreateFramebuffer, DestroyFramebuffer> m_framebuffer_manager;

View File

@ -69,10 +69,6 @@ public:
return &m_semaphores[m_current_semaphore].rendering_finished_semaphore;
}
// Returns true if the current present mode is synchronizing (adaptive or hard).
ALWAYS_INLINE bool IsPresentModeSynchronizing() const { return (m_present_mode == VK_PRESENT_MODE_FIFO_KHR); }
ALWAYS_INLINE VkPresentModeKHR GetPresentMode() const { return m_present_mode; }
VkResult AcquireNextImage();
void ReleaseCurrentImage();
void ResetImageAcquireResult();