SPU: Add time stretched audio output

2022-07-28 00:42:41 +10:00
parent f54e32ff01
commit 68b5dd869c
27 changed files with 1165 additions and 808 deletions
--- a/src/util/audio_stream.h
+++ b/src/util/audio_stream.h
@ -1,13 +1,26 @@
 #pragma once
-#include "common/fifo_queue.h"
 #include "common/types.h"
+#include <array>
 #include <atomic>
-#include <condition_variable>
 #include <memory>
-#include <mutex>
-#include <vector>
+#include <optional>

-// Uses signed 16-bits samples.
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4324) // warning C4324: structure was padded due to alignment specifier
+#endif
+
+namespace soundtouch {
+class SoundTouch;
+}
+
+enum class AudioStretchMode : u8
+{
+  Off,
+  Resample,
+  TimeStretch,
+  Count
+};

 class AudioStream
 {
@ -16,111 +29,116 @@ public:

  enum : u32
  {
-    DefaultInputSampleRate = 44100,
-    DefaultOutputSampleRate = 44100,
-    DefaultBufferSize = 2048,
-    MaxSamples = 32768,
-    FullVolume = 100
+    CHUNK_SIZE = 64,
+    MAX_CHANNELS = 2
  };

-  AudioStream();
+public:
  virtual ~AudioStream();

-  u32 GetOutputSampleRate() const { return m_output_sample_rate; }
-  u32 GetChannels() const { return m_channels; }
-  u32 GetBufferSize() const { return m_buffer_size; }
-  s32 GetOutputVolume() const { return m_output_volume; }
-  bool IsSyncing() const { return m_sync; }
+  static u32 GetAlignedBufferSize(u32 size);
+  static u32 GetBufferSizeForMS(u32 sample_rate, u32 ms);
+  static u32 GetMSForBufferSize(u32 sample_rate, u32 buffer_size);

-  bool Reconfigure(u32 input_sample_rate = DefaultInputSampleRate, u32 output_sample_rate = DefaultOutputSampleRate,
-                   u32 channels = 1, u32 buffer_size = DefaultBufferSize);
-  void SetSync(bool enable) { m_sync = enable; }
+  static const char* GetStretchModeName(AudioStretchMode mode);
+  static std::optional<AudioStretchMode> ParseStretchMode(const char* name);

-  void SetInputSampleRate(u32 sample_rate);
-  void SetWaitForBufferFill(bool enabled);
+  ALWAYS_INLINE u32 GetSampleRate() const { return m_sample_rate; }
+  ALWAYS_INLINE u32 GetChannels() const { return m_channels; }
+  ALWAYS_INLINE u32 GetBufferSize() const { return m_buffer_size; }
+  ALWAYS_INLINE u32 GetTargetBufferSize() const { return m_target_buffer_size; }
+  ALWAYS_INLINE u32 GetOutputVolume() const { return m_volume; }
+  ALWAYS_INLINE float GetNominalTempo() const { return m_nominal_rate; }
+  ALWAYS_INLINE bool IsPaused() const { return m_paused; }
+
+  u32 GetBufferedFramesRelaxed() const;
+
+  /// Temporarily pauses the stream, preventing it from requesting data.
+  virtual void SetPaused(bool paused);

  virtual void SetOutputVolume(u32 volume);

-  void PauseOutput(bool paused);
-  void EmptyBuffers();
-
-  void Shutdown();
-
  void BeginWrite(SampleType** buffer_ptr, u32* num_frames);
  void WriteFrames(const SampleType* frames, u32 num_frames);
  void EndWrite(u32 num_frames);

-  bool DidUnderflow()
-  {
-    bool expected = true;
-    return m_underflow_flag.compare_exchange_strong(expected, false);
-  }
+  void EmptyBuffer();

-  static std::unique_ptr<AudioStream> CreateNullAudioStream();
+  /// Nominal rate is used for both resampling and timestretching, input samples are assumed to be this amount faster
+  /// than the sample rate.
+  void SetNominalRate(float tempo);

-  // Latency computation - returns values in seconds
-  static float GetMaxLatency(u32 sample_rate, u32 buffer_size);
+  void SetStretchMode(AudioStretchMode mode);
+
+  static std::unique_ptr<AudioStream> CreateNullStream(u32 sample_rate, u32 channels, u32 buffer_ms);

 protected:
-  virtual bool OpenDevice() = 0;
-  virtual void PauseDevice(bool paused) = 0;
-  virtual void CloseDevice() = 0;
-  virtual void FramesAvailable() = 0;
+  AudioStream(u32 sample_rate, u32 channels, u32 buffer_ms, AudioStretchMode stretch);
+  void BaseInitialize();

-  ALWAYS_INLINE static SampleType ApplyVolume(SampleType sample, u32 volume)
-  {
-    return s16((s32(sample) * s32(volume)) / 100);
-  }
+  void ReadFrames(s16* bData, u32 nSamples);

-  ALWAYS_INLINE u32 GetBufferSpace() const { return (m_max_samples - m_buffer.GetSize()); }
-  ALWAYS_INLINE void ReleaseBufferLock(std::unique_lock<std::mutex> lock)
-  {
-    // lock is released implicitly by destruction
-    m_buffer_draining_cv.notify_one();
-  }
-
-  bool SetBufferSize(u32 buffer_size);
-  bool IsDeviceOpen() const { return (m_output_sample_rate > 0); }
-
-  void EnsureBuffer(u32 size);
-  void LockedEmptyBuffers();
-  u32 GetSamplesAvailable() const;
-  u32 GetSamplesAvailableLocked() const;
-  void ReadFrames(SampleType* samples, u32 num_frames, bool apply_volume);
-  void DropFrames(u32 count);
-
-  void CreateResampler();
-  void DestroyResampler();
-  void ResetResampler();
-  void InternalSetInputSampleRate(u32 sample_rate);
-  void ResampleInput(std::unique_lock<std::mutex> buffer_lock);
-
-  u32 m_input_sample_rate = 0;
-  u32 m_output_sample_rate = 0;
+  u32 m_sample_rate = 0;
  u32 m_channels = 0;
+  u32 m_buffer_ms = 0;
+  u32 m_volume = 0;
+
+  AudioStretchMode m_stretch_mode = AudioStretchMode::Off;
+  bool m_stretch_inactive = false;
+  bool m_filling = false;
+  bool m_paused = false;
+
+private:
+  enum : u32
+  {
+    AVERAGING_BUFFER_SIZE = 256,
+    AVERAGING_WINDOW = 50,
+    STRETCH_RESET_THRESHOLD = 5,
+    TARGET_IPS = 691,
+  };
+
+  void AllocateBuffer();
+  void DestroyBuffer();
+
+  void InternalWriteFrames(s32* bData, u32 nFrames);
+
+  void StretchAllocate();
+  void StretchDestroy();
+  void StretchWrite();
+  void StretchUnderrun();
+  void StretchOverrun();
+
+  float AddAndGetAverageTempo(float val);
+  void UpdateStretchTempo();
+
  u32 m_buffer_size = 0;
+  std::unique_ptr<s32[]> m_buffer;

-  // volume, 0-100
-  u32 m_output_volume = FullVolume;
+  std::atomic<u32> m_rpos{0};
+  std::atomic<u32> m_wpos{0};

-  HeapFIFOQueue<SampleType, MaxSamples> m_buffer;
-  mutable std::mutex m_buffer_mutex;
-  std::condition_variable m_buffer_draining_cv;
-  std::vector<SampleType> m_resample_buffer;
+  std::unique_ptr<soundtouch::SoundTouch> m_soundtouch;

-  std::atomic_bool m_underflow_flag{false};
-  std::atomic_bool m_buffer_filling{false};
-  u32 m_max_samples = 0;
+  u32 m_target_buffer_size = 0;
+  u32 m_stretch_reset = STRETCH_RESET_THRESHOLD;

-  bool m_output_paused = true;
-  bool m_sync = true;
-  bool m_wait_for_buffer_fill = false;
+  u32 m_stretch_ok_count = 0;
+  float m_nominal_rate = 1.0f;
+  float m_dynamic_target_usage = 0.0f;

-  // Resampling
-  double m_resampler_ratio = 1.0;
-  void* m_resampler_state = nullptr;
-  std::mutex m_resampler_mutex;
-  HeapFIFOQueue<SampleType, MaxSamples> m_resampled_buffer;
-  std::vector<float> m_resample_in_buffer;
-  std::vector<float> m_resample_out_buffer;
-};
+  u32 m_average_position = 0;
+  u32 m_average_available = 0;
+  u32 m_staging_buffer_pos = 0;
+
+  std::array<float, AVERAGING_BUFFER_SIZE> m_average_fullness = {};
+
+  // temporary staging buffer, used for timestretching
+  alignas(16) std::array<s32, CHUNK_SIZE> m_staging_buffer;
+
+  // float buffer, soundtouch only accepts float samples as input
+  alignas(16) std::array<float, CHUNK_SIZE * MAX_CHANNELS> m_float_buffer;
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif