GPU: Move software fill/write/copy into rasterizer namespace
This commit is contained in:
@ -16,8 +16,6 @@ GPU_SW_Backend::~GPU_SW_Backend() = default;
|
||||
|
||||
bool GPU_SW_Backend::Initialize(bool force_thread)
|
||||
{
|
||||
GPU_SW_Rasterizer::SelectImplementation();
|
||||
|
||||
return GPUBackend::Initialize(force_thread);
|
||||
}
|
||||
|
||||
@ -59,186 +57,20 @@ void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd)
|
||||
|
||||
void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
|
||||
{
|
||||
const u16 color16 = VRAMRGBA8888ToRGBA5551(color);
|
||||
const GSVector4i fill = GSVector4i(color16, color16, color16, color16, color16, color16, color16, color16);
|
||||
constexpr u32 vector_width = 8;
|
||||
const u32 aligned_width = Common::AlignDownPow2(width, vector_width);
|
||||
|
||||
if ((x + width) <= VRAM_WIDTH && !params.interlaced_rendering)
|
||||
{
|
||||
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||
{
|
||||
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
||||
|
||||
u16* row_ptr = &g_vram[row * VRAM_WIDTH + x];
|
||||
u32 xoffs = 0;
|
||||
for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width)
|
||||
GSVector4i::store<false>(row_ptr, fill);
|
||||
for (; xoffs < width; xoffs++)
|
||||
*(row_ptr++) = color16;
|
||||
}
|
||||
}
|
||||
else if (params.interlaced_rendering)
|
||||
{
|
||||
// Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field.
|
||||
const u32 active_field = params.active_line_lsb;
|
||||
|
||||
if ((x + width) <= VRAM_WIDTH)
|
||||
{
|
||||
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||
{
|
||||
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
||||
if ((row & u32(1)) == active_field)
|
||||
continue;
|
||||
|
||||
u16* row_ptr = &g_vram[row * VRAM_WIDTH + x];
|
||||
u32 xoffs = 0;
|
||||
for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width)
|
||||
GSVector4i::store<false>(row_ptr, fill);
|
||||
for (; xoffs < width; xoffs++)
|
||||
*(row_ptr++) = color16;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||
{
|
||||
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
||||
if ((row & u32(1)) == active_field)
|
||||
continue;
|
||||
|
||||
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
|
||||
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
||||
{
|
||||
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
||||
row_ptr[col] = color16;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||
{
|
||||
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
||||
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
|
||||
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
||||
{
|
||||
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
||||
row_ptr[col] = color16;
|
||||
}
|
||||
}
|
||||
}
|
||||
GPU_SW_Rasterizer::FillVRAM(x, y, width, height, color, params.interlaced_rendering, params.active_line_lsb);
|
||||
}
|
||||
|
||||
void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
|
||||
GPUBackendCommandParameters params)
|
||||
{
|
||||
// Fast path when the copy is not oversized.
|
||||
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled())
|
||||
{
|
||||
const u16* src_ptr = static_cast<const u16*>(data);
|
||||
u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x];
|
||||
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||
{
|
||||
std::copy_n(src_ptr, width, dst_ptr);
|
||||
src_ptr += width;
|
||||
dst_ptr += VRAM_WIDTH;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Slow path when we need to handle wrap-around.
|
||||
const u16* src_ptr = static_cast<const u16*>(data);
|
||||
const u16 mask_and = params.GetMaskAND();
|
||||
const u16 mask_or = params.GetMaskOR();
|
||||
|
||||
for (u32 row = 0; row < height;)
|
||||
{
|
||||
u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||
for (u32 col = 0; col < width;)
|
||||
{
|
||||
// TODO: Handle unaligned reads...
|
||||
u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
|
||||
if (((*pixel_ptr) & mask_and) == 0)
|
||||
*pixel_ptr = *(src_ptr++) | mask_or;
|
||||
}
|
||||
}
|
||||
}
|
||||
GPU_SW_Rasterizer::WriteVRAM(x, y, width, height, data, params.set_mask_while_drawing, params.check_mask_before_draw);
|
||||
}
|
||||
|
||||
void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
|
||||
GPUBackendCommandParameters params)
|
||||
{
|
||||
// Break up oversized copies. This behavior has not been verified on console.
|
||||
if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH)
|
||||
{
|
||||
u32 remaining_rows = height;
|
||||
u32 current_src_y = src_y;
|
||||
u32 current_dst_y = dst_y;
|
||||
while (remaining_rows > 0)
|
||||
{
|
||||
const u32 rows_to_copy =
|
||||
std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y));
|
||||
|
||||
u32 remaining_columns = width;
|
||||
u32 current_src_x = src_x;
|
||||
u32 current_dst_x = dst_x;
|
||||
while (remaining_columns > 0)
|
||||
{
|
||||
const u32 columns_to_copy =
|
||||
std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x));
|
||||
CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy, params);
|
||||
current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH;
|
||||
current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH;
|
||||
remaining_columns -= columns_to_copy;
|
||||
}
|
||||
|
||||
current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT;
|
||||
current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT;
|
||||
remaining_rows -= rows_to_copy;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// This doesn't have a fast path, but do we really need one? It's not common.
|
||||
const u16 mask_and = params.GetMaskAND();
|
||||
const u16 mask_or = params.GetMaskOR();
|
||||
|
||||
// Copy in reverse when src_x < dst_x, this is verified on console.
|
||||
if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH))
|
||||
{
|
||||
for (u32 row = 0; row < height; row++)
|
||||
{
|
||||
const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||
u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||
|
||||
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
|
||||
{
|
||||
const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH];
|
||||
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH];
|
||||
if ((*dst_pixel_ptr & mask_and) == 0)
|
||||
*dst_pixel_ptr = src_pixel | mask_or;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (u32 row = 0; row < height; row++)
|
||||
{
|
||||
const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||
u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||
|
||||
for (u32 col = 0; col < width; col++)
|
||||
{
|
||||
const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH];
|
||||
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH];
|
||||
if ((*dst_pixel_ptr & mask_and) == 0)
|
||||
*dst_pixel_ptr = src_pixel | mask_or;
|
||||
}
|
||||
}
|
||||
}
|
||||
GPU_SW_Rasterizer::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params.set_mask_while_drawing,
|
||||
params.check_mask_before_draw);
|
||||
}
|
||||
|
||||
void GPU_SW_Backend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit)
|
||||
|
||||
Reference in New Issue
Block a user