SPU: Add time stretched audio output

This commit is contained in:
Connor McLaughlin
2022-07-28 00:42:41 +10:00
parent f54e32ff01
commit 68b5dd869c
27 changed files with 1165 additions and 808 deletions

View File

@ -1,13 +1,26 @@
#pragma once
#include "common/fifo_queue.h"
#include "common/types.h"
#include <array>
#include <atomic>
#include <condition_variable>
#include <memory>
#include <mutex>
#include <vector>
#include <optional>
// Uses signed 16-bits samples.
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4324) // warning C4324: structure was padded due to alignment specifier
#endif
namespace soundtouch {
class SoundTouch;
}
enum class AudioStretchMode : u8
{
Off,
Resample,
TimeStretch,
Count
};
class AudioStream
{
@ -16,111 +29,116 @@ public:
enum : u32
{
DefaultInputSampleRate = 44100,
DefaultOutputSampleRate = 44100,
DefaultBufferSize = 2048,
MaxSamples = 32768,
FullVolume = 100
CHUNK_SIZE = 64,
MAX_CHANNELS = 2
};
AudioStream();
public:
virtual ~AudioStream();
u32 GetOutputSampleRate() const { return m_output_sample_rate; }
u32 GetChannels() const { return m_channels; }
u32 GetBufferSize() const { return m_buffer_size; }
s32 GetOutputVolume() const { return m_output_volume; }
bool IsSyncing() const { return m_sync; }
static u32 GetAlignedBufferSize(u32 size);
static u32 GetBufferSizeForMS(u32 sample_rate, u32 ms);
static u32 GetMSForBufferSize(u32 sample_rate, u32 buffer_size);
bool Reconfigure(u32 input_sample_rate = DefaultInputSampleRate, u32 output_sample_rate = DefaultOutputSampleRate,
u32 channels = 1, u32 buffer_size = DefaultBufferSize);
void SetSync(bool enable) { m_sync = enable; }
static const char* GetStretchModeName(AudioStretchMode mode);
static std::optional<AudioStretchMode> ParseStretchMode(const char* name);
void SetInputSampleRate(u32 sample_rate);
void SetWaitForBufferFill(bool enabled);
ALWAYS_INLINE u32 GetSampleRate() const { return m_sample_rate; }
ALWAYS_INLINE u32 GetChannels() const { return m_channels; }
ALWAYS_INLINE u32 GetBufferSize() const { return m_buffer_size; }
ALWAYS_INLINE u32 GetTargetBufferSize() const { return m_target_buffer_size; }
ALWAYS_INLINE u32 GetOutputVolume() const { return m_volume; }
ALWAYS_INLINE float GetNominalTempo() const { return m_nominal_rate; }
ALWAYS_INLINE bool IsPaused() const { return m_paused; }
u32 GetBufferedFramesRelaxed() const;
/// Temporarily pauses the stream, preventing it from requesting data.
virtual void SetPaused(bool paused);
virtual void SetOutputVolume(u32 volume);
void PauseOutput(bool paused);
void EmptyBuffers();
void Shutdown();
void BeginWrite(SampleType** buffer_ptr, u32* num_frames);
void WriteFrames(const SampleType* frames, u32 num_frames);
void EndWrite(u32 num_frames);
bool DidUnderflow()
{
bool expected = true;
return m_underflow_flag.compare_exchange_strong(expected, false);
}
void EmptyBuffer();
static std::unique_ptr<AudioStream> CreateNullAudioStream();
/// Nominal rate is used for both resampling and timestretching, input samples are assumed to be this amount faster
/// than the sample rate.
void SetNominalRate(float tempo);
// Latency computation - returns values in seconds
static float GetMaxLatency(u32 sample_rate, u32 buffer_size);
void SetStretchMode(AudioStretchMode mode);
static std::unique_ptr<AudioStream> CreateNullStream(u32 sample_rate, u32 channels, u32 buffer_ms);
protected:
virtual bool OpenDevice() = 0;
virtual void PauseDevice(bool paused) = 0;
virtual void CloseDevice() = 0;
virtual void FramesAvailable() = 0;
AudioStream(u32 sample_rate, u32 channels, u32 buffer_ms, AudioStretchMode stretch);
void BaseInitialize();
ALWAYS_INLINE static SampleType ApplyVolume(SampleType sample, u32 volume)
{
return s16((s32(sample) * s32(volume)) / 100);
}
void ReadFrames(s16* bData, u32 nSamples);
ALWAYS_INLINE u32 GetBufferSpace() const { return (m_max_samples - m_buffer.GetSize()); }
ALWAYS_INLINE void ReleaseBufferLock(std::unique_lock<std::mutex> lock)
{
// lock is released implicitly by destruction
m_buffer_draining_cv.notify_one();
}
bool SetBufferSize(u32 buffer_size);
bool IsDeviceOpen() const { return (m_output_sample_rate > 0); }
void EnsureBuffer(u32 size);
void LockedEmptyBuffers();
u32 GetSamplesAvailable() const;
u32 GetSamplesAvailableLocked() const;
void ReadFrames(SampleType* samples, u32 num_frames, bool apply_volume);
void DropFrames(u32 count);
void CreateResampler();
void DestroyResampler();
void ResetResampler();
void InternalSetInputSampleRate(u32 sample_rate);
void ResampleInput(std::unique_lock<std::mutex> buffer_lock);
u32 m_input_sample_rate = 0;
u32 m_output_sample_rate = 0;
u32 m_sample_rate = 0;
u32 m_channels = 0;
u32 m_buffer_ms = 0;
u32 m_volume = 0;
AudioStretchMode m_stretch_mode = AudioStretchMode::Off;
bool m_stretch_inactive = false;
bool m_filling = false;
bool m_paused = false;
private:
enum : u32
{
AVERAGING_BUFFER_SIZE = 256,
AVERAGING_WINDOW = 50,
STRETCH_RESET_THRESHOLD = 5,
TARGET_IPS = 691,
};
void AllocateBuffer();
void DestroyBuffer();
void InternalWriteFrames(s32* bData, u32 nFrames);
void StretchAllocate();
void StretchDestroy();
void StretchWrite();
void StretchUnderrun();
void StretchOverrun();
float AddAndGetAverageTempo(float val);
void UpdateStretchTempo();
u32 m_buffer_size = 0;
std::unique_ptr<s32[]> m_buffer;
// volume, 0-100
u32 m_output_volume = FullVolume;
std::atomic<u32> m_rpos{0};
std::atomic<u32> m_wpos{0};
HeapFIFOQueue<SampleType, MaxSamples> m_buffer;
mutable std::mutex m_buffer_mutex;
std::condition_variable m_buffer_draining_cv;
std::vector<SampleType> m_resample_buffer;
std::unique_ptr<soundtouch::SoundTouch> m_soundtouch;
std::atomic_bool m_underflow_flag{false};
std::atomic_bool m_buffer_filling{false};
u32 m_max_samples = 0;
u32 m_target_buffer_size = 0;
u32 m_stretch_reset = STRETCH_RESET_THRESHOLD;
bool m_output_paused = true;
bool m_sync = true;
bool m_wait_for_buffer_fill = false;
u32 m_stretch_ok_count = 0;
float m_nominal_rate = 1.0f;
float m_dynamic_target_usage = 0.0f;
// Resampling
double m_resampler_ratio = 1.0;
void* m_resampler_state = nullptr;
std::mutex m_resampler_mutex;
HeapFIFOQueue<SampleType, MaxSamples> m_resampled_buffer;
std::vector<float> m_resample_in_buffer;
std::vector<float> m_resample_out_buffer;
};
u32 m_average_position = 0;
u32 m_average_available = 0;
u32 m_staging_buffer_pos = 0;
std::array<float, AVERAGING_BUFFER_SIZE> m_average_fullness = {};
// temporary staging buffer, used for timestretching
alignas(16) std::array<s32, CHUNK_SIZE> m_staging_buffer;
// float buffer, soundtouch only accepts float samples as input
alignas(16) std::array<float, CHUNK_SIZE * MAX_CHANNELS> m_float_buffer;
};
#ifdef _MSC_VER
#pragma warning(pop)
#endif