MDEC: Vectorize 16 and 24-bit copy-out

Speedup of 15% and 6% in FMV playback respectively.
This commit is contained in:
Stenzek
2024-09-02 17:57:26 +10:00
parent 63abdb7afd
commit 6a3b1a2725
2 changed files with 73 additions and 1 deletions

View File

@ -11,6 +11,7 @@
#include <type_traits>
#if defined(CPU_ARCH_X86) || defined(CPU_ARCH_X64)
#define CPU_ARCH_SIMD 1
#define CPU_ARCH_SSE 1
#include <emmintrin.h>
#include <tmmintrin.h>
@ -28,6 +29,7 @@
#define CPU_ARCH_SSE41 1
#endif
#elif defined(CPU_ARCH_ARM32) || defined(CPU_ARCH_ARM64)
#define CPU_ARCH_SIMD 1
#define CPU_ARCH_NEON 1
#if defined(_MSC_VER) && !defined(__clang__)
#include <arm64_neon.h>