From baab00f49b84a9fed5136854f32aaecae57675fc Mon Sep 17 00:00:00 2001 From: Sine Striker Date: Wed, 12 Jun 2024 02:50:26 +0800 Subject: [PATCH 1/8] Implement file decoder --- TuneLab/Audio/FFmpeg/FFmpegCodec.cs | 321 +++++++++++++++++++++++++++ TuneLab/Audio/FFmpeg/SimpleVector.cs | 74 ++++++ 2 files changed, 395 insertions(+) create mode 100644 TuneLab/Audio/FFmpeg/FFmpegCodec.cs create mode 100644 TuneLab/Audio/FFmpeg/SimpleVector.cs diff --git a/TuneLab/Audio/FFmpeg/FFmpegCodec.cs b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs new file mode 100644 index 00000000..22a3a1ec --- /dev/null +++ b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs @@ -0,0 +1,321 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.InteropServices; +using System.Text; +using FFmpeg.AutoGen; +using Buffer = System.Buffer; +using Marshal = System.Runtime.InteropServices.Marshal; + +namespace TuneLab.Audio.FFmpeg; + +internal class FFmpegCodec : IAudioCodec +{ + public IEnumerable AllDecodableFormats { get; } = ["wav", "mp3", "aiff", "aac", "wma", "mp4"]; + + public AudioInfo GetAudioInfo(string path) + { + return new AudioInfo(); + } + + public IAudioStream Decode(string path) + { + return new FileAudioStream(path); + } + + public void EncodeToWav(string path, float[] buffer, int samplingRate, int bitPerSample, int channelCount) + { + } + + public IAudioStream Resample(IAudioProvider input, int outputSamplingRate) + { + return null; + } + + private unsafe class FileAudioStream : IAudioStream + { + public int SamplingRate => _codecContext != null ? _codecContext->sample_rate : 0; + public int ChannelCount => _codecContext != null ? _codecContext->ch_layout.nb_channels : 0; + public int SamplesPerChannel => (int)_samples; + + public FileAudioStream(string fileName) + { + try + { + OpenFile(fileName); + } + catch (Exception e) + { + CloseFile(); + throw; + } + } + + public void Dispose() + { + CloseFile(); + } + + public void Read(float[] buffer, int offset, int count) + { + } + + // 文件信息 + private string _fileName; + + // FFmpeg 指针 + private AVFormatContext* _formatContext; + private AVCodecContext* _codecContext; + private AVPacket* _packet; + private AVFrame* _frame; + + // 音频信息 + private int _audioIndex; // 音频流序号 + private long _samples; // 不包括声道 + + // 内部缓冲区相关 + SimpleVector _cachedBuffer; // 缓冲区 + int _cachedBufferPos; // 缓冲区读取位置 + + // 打开音频 + private void OpenFile(string fileName) + { + _fileName = fileName; + + var fmt_ctx = ffmpeg.avformat_alloc_context(); + _formatContext = fmt_ctx; + + // 打开文件 + var ret = ffmpeg.avformat_open_input(&fmt_ctx, fileName, null, null); + if (ret != 0) + { + throw new FileLoadException($"FFmpeg: Failed to load file {fileName}.", fileName); + } + + // 查找流信息 + ret = ffmpeg.avformat_find_stream_info(fmt_ctx, null); + if (ret < 0) + { + throw new DecoderFallbackException("FFmpeg: Failed to find streams."); + } + + // 查找音频流 + var audio_idx = ffmpeg.av_find_best_stream(fmt_ctx, AVMediaType.AVMEDIA_TYPE_AUDIO, + -1, -1, null, 0); + if (audio_idx < 0) + { + throw new DecoderFallbackException("FFmpeg: Failed to find audio stream."); + } + + _audioIndex = audio_idx; + + // 查找解码器 + var stream = fmt_ctx->streams[audio_idx]; + var codec_param = stream->codecpar; + var codec = ffmpeg.avcodec_find_decoder(codec_param->codec_id); + if (codec == null) + { + throw new DecoderFallbackException("FFmpeg: Failed to find decoder."); + } + + // 分配解码器上下文 + var codec_ctx = ffmpeg.avcodec_alloc_context3(null); + _codecContext = codec_ctx; + + // 传递解码器信息 + ret = ffmpeg.avcodec_parameters_to_context(codec_ctx, codec_param); + if (ret < 0) + { + throw new DecoderFallbackException("FFmpeg: Failed to pass params to codec."); + } + + // 打开解码器 + ret = ffmpeg.avcodec_open2(codec_ctx, codec, null); + if (ret < 0) + { + throw new DecoderFallbackException("FFmpeg: Failed to open decoder."); + } + + _samples = (long)(stream->duration * codec_ctx->sample_rate * + stream->time_base.num / (float)stream->time_base.den + ); + + // 初始化缓冲区 + _cachedBuffer = new SimpleVector(); + _cachedBufferPos = 0; + } + + private void CloseFile() + { + var fmt_ctx = _formatContext; + var codec_ctx = _codecContext; + + var pkt = _packet; + var frame = _frame; + + if (frame != null) + { + ffmpeg.av_frame_free(&frame); + } + + if (pkt != null) + { + ffmpeg.av_packet_free(&pkt); + } + + if (codec_ctx != null) + { + // ffmpeg.avcodec_close(codec_ctx); + ffmpeg.avcodec_free_context(&codec_ctx); + } + + if (fmt_ctx != null) + { + ffmpeg.avformat_close_input(&fmt_ctx); + } + + _samples = 0; + _audioIndex = 0; + + _frame = null; + _packet = null; + _codecContext = null; + _formatContext = null; + + _fileName = string.Empty; + } + + private int Decode(byte[] buf, int requiredSize) + { + var fmt_ctx = _formatContext; + var codec_ctx = _codecContext; + + var pkt = _packet; + var frame = _frame; + + // 采取边解码边写到输出缓冲区的方式。策略是先把 cache 全部写出,然后边解码边写,写到最后剩下的再存入 cache + int bytesWritten = 0; + { + var cacheSize = Math.Min(_cachedBuffer.Size - _cachedBufferPos, requiredSize); + if (cacheSize > 0) + { + Buffer.BlockCopy(_cachedBuffer.Data, _cachedBufferPos, buf, 0, cacheSize); + _cachedBufferPos += cacheSize; + bytesWritten = cacheSize; + } + } + + while (bytesWritten < requiredSize) + { + int ret = ffmpeg.av_read_frame(fmt_ctx, pkt); + + // 判断是否结束 + if (ret == ffmpeg.AVERROR_EOF) + { + ffmpeg.av_packet_unref(pkt); + break; + } + + if (ret != 0) + { + // 忽略 + Console.WriteLine($"FFmpeg: Error getting next frame with code {-ret:x}, ignored."); + continue; + } + + // 跳过其他流 + if (pkt->stream_index != _audioIndex) + { + ffmpeg.av_packet_unref(pkt); + continue; + } + + // 发送待解码包 + ret = ffmpeg.avcodec_send_packet(codec_ctx, pkt); + ffmpeg.av_packet_unref(pkt); + if (ret < 0) + { + // 忽略 + Console.WriteLine($"FFmpeg: Error submitting a packet for decoding with code {-ret:x}, ignored."); + continue; + } + + while (ret >= 0) + { + // 接收解码数据 + ret = ffmpeg.avcodec_receive_frame(codec_ctx, frame); + if (ret == ffmpeg.AVERROR_EOF || ret == ffmpeg.AVERROR(ffmpeg.EAGAIN)) + { + // 结束 + break; + } + else if (ret < 0) + { + // 出错 + ffmpeg.av_frame_unref(frame); + + // 忽略 + Console.WriteLine($"FFmpeg: Error decoding frame with code {-ret:x}, ignored."); + continue; + } + + int size_need = requiredSize - bytesWritten; + int size_supply = ffmpeg.av_samples_get_buffer_size(null, frame->ch_layout.nb_channels, + frame->nb_samples, (AVSampleFormat)frame->format, 1); + + var arr = frame->data[0]; + + var size_cached = size_supply - size_need; + if (size_cached > 0) + { + // 写到输出缓冲区 + Marshal.Copy((IntPtr)arr, buf, bytesWritten, size_need); + + // 剩下的存入 cache + _cachedBuffer.Resize(size_cached); + Marshal.Copy(IntPtr.Add((IntPtr)arr, size_need), _cachedBuffer.Data, 0, size_cached); + _cachedBufferPos = 0; + + bytesWritten = requiredSize; + } + else + { + // 全部写到输出缓冲区 + Marshal.Copy((IntPtr)arr, buf, bytesWritten, size_supply); + bytesWritten += size_supply; + } + + ffmpeg.av_frame_unref(frame); + } + } + + return bytesWritten; + } + } + + protected unsafe class ResampledAudioStream : IAudioStream + { + public int SamplingRate { get; } + public int ChannelCount { get; } + public int SamplesPerChannel { get; } + + public ResampledAudioStream(IAudioStream input, int sampleRate) + { + SamplingRate = sampleRate; + ChannelCount = input.ChannelCount; + SamplesPerChannel = (int)((long)input.SamplesPerChannel * sampleRate / input.SamplingRate); + + _stream = input; + } + + public void Read(float[] buffer, int offset, int count) + { + } + + public void Dispose() + { + } + + private IAudioStream _stream; + } +} \ No newline at end of file diff --git a/TuneLab/Audio/FFmpeg/SimpleVector.cs b/TuneLab/Audio/FFmpeg/SimpleVector.cs new file mode 100644 index 00000000..1e4458e4 --- /dev/null +++ b/TuneLab/Audio/FFmpeg/SimpleVector.cs @@ -0,0 +1,74 @@ +using System; + +namespace TuneLab.Audio.FFmpeg; + +public class SimpleVector +{ + private T[] _data; + private int _size; + private int _capacity; + + public SimpleVector(int capacity = 10) + { + _size = 0; + _capacity = capacity; + _data = new T[_capacity]; + } + + public int Size => _size; + public int Capacity => _capacity; + public T this[int index] => _data[index]; + public T[] Data => _data; + + public void Append(T item) + { + var newSize = _size + 1; + if (newSize > _capacity) + { + AllocateSpace(newSize * 2); + } + + _data[_size] = item; + _size = newSize; + } + + public void Append(params T[] items) + { + var newSize = _size + items.Length; + if (newSize > _capacity) + { + AllocateSpace(newSize * 2); + } + + Array.Copy(items, 0, _data, _size, items.Length); + _size = newSize; + } + + public void Resize(int newSize) + { + if (newSize < 0) + throw new ArgumentOutOfRangeException(nameof(newSize), "New size must be non-negative."); + if (newSize > _capacity) + { + AllocateSpace(newSize); + } + + _size = newSize; + } + + public void Reserve(int newCapacity) + { + if (newCapacity > _capacity) + { + AllocateSpace(newCapacity); + } + } + + private void AllocateSpace(int capacity) + { + var newItems = new T[capacity]; + Array.Copy(_data, 0, newItems, 0, Math.Min(_size, capacity)); + _data = newItems; + _capacity = capacity; + } +} \ No newline at end of file From f2ae92c430c2c4127fa9a9f446e274ed4b25cd84 Mon Sep 17 00:00:00 2001 From: Sine Striker Date: Thu, 13 Jun 2024 00:47:11 +0800 Subject: [PATCH 2/8] Finish file decoder --- TuneLab/Audio/FFmpeg/FFmpegCodec.cs | 324 ++++++++++++++++++++++++--- TuneLab/Audio/FFmpeg/SimpleVector.cs | 74 ------ 2 files changed, 295 insertions(+), 103 deletions(-) delete mode 100644 TuneLab/Audio/FFmpeg/SimpleVector.cs diff --git a/TuneLab/Audio/FFmpeg/FFmpegCodec.cs b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs index 22a3a1ec..c2b7c461 100644 --- a/TuneLab/Audio/FFmpeg/FFmpegCodec.cs +++ b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs @@ -1,18 +1,114 @@ using System; +using System.Collections; using System.Collections.Generic; using System.IO; +using System.Reflection; using System.Runtime.InteropServices; using System.Text; using FFmpeg.AutoGen; +using NAudio.Wave; +using TuneLab.Audio.NAudio; +using TuneLab.Base.Utils; using Buffer = System.Buffer; -using Marshal = System.Runtime.InteropServices.Marshal; +using FFmpegNative = FFmpeg.AutoGen.Native; namespace TuneLab.Audio.FFmpeg; +internal static class Utils +{ + public static T[] GetBuffer(this List list) + { + var fieldInfo = list.GetType().GetField("_items", BindingFlags.NonPublic | BindingFlags.Instance); + return (T[])fieldInfo!.GetValue(list)!; + } + + public static unsafe void BytesToFloats(float[] dest, int destIndex, byte[] bytes, int bytesSize, + int channelCount) + { + int sizeOfT = Marshal.SizeOf(); + int totalSamples = bytesSize / (sizeOfT * channelCount) * channelCount; + + fixed (byte* src = bytes) + { + if (typeof(T) == typeof(byte)) + { + const float max = byte.MaxValue; + for (var i = 0; i < totalSamples; i++) + { + var intPtr = src + i * sizeOfT; + dest[i + destIndex] = *intPtr / max; + } + + return; + } + + if (typeof(T) == typeof(int)) + { + const float max = int.MaxValue; + for (var i = 0; i < totalSamples; i++) + { + var intPtr = (int*)(src + i * sizeOfT); + dest[i + destIndex] = *intPtr / max; + } + + return; + } + + if (typeof(T) == typeof(short)) + { + const float max = short.MaxValue; + for (var i = 0; i < totalSamples; i++) + { + var shortPtr = (short*)(src + i * sizeOfT); + dest[i] = *shortPtr / max; + } + + return; + } + + if (typeof(T) == typeof(float)) + { + for (var i = 0; i < totalSamples; i++) + { + var floatPtr = (float*)(src + i * sizeOfT); + dest[i] = *floatPtr; + } + + return; + } + + if (typeof(T) == typeof(double)) + { + for (var i = 0; i < totalSamples; i++) + { + var doublePtr = (double*)(src + i * sizeOfT); + dest[i] = (float)*doublePtr; + } + } + } + } +} + internal class FFmpegCodec : IAudioCodec { public IEnumerable AllDecodableFormats { get; } = ["wav", "mp3", "aiff", "aac", "wma", "mp4"]; + public FFmpegCodec(string libraryDir) + { + var libs = new[] { "avcodec", "avutil", "avformat", "swresample" }; + ffmpeg.RootPath = libraryDir; + foreach (var lib in libs) + { + var ver = ffmpeg.LibraryVersionMap[lib]; + var nativeLibraryName = FFmpegNative.LibraryLoader.GetNativeLibraryName(lib, ver); + var fullName = Path.Combine(ffmpeg.RootPath, nativeLibraryName); + if (!File.Exists(fullName)) + { + throw new FileNotFoundException($"FFmpeg library {fullName} not found!"); + } + } + } + public AudioInfo GetAudioInfo(string path) { return new AudioInfo(); @@ -20,32 +116,40 @@ public AudioInfo GetAudioInfo(string path) public IAudioStream Decode(string path) { - return new FileAudioStream(path); + return new FileDecoderStream(path); } public void EncodeToWav(string path, float[] buffer, int samplingRate, int bitPerSample, int channelCount) { + WaveFormat waveFormat = new WaveFormat(samplingRate, 16, channelCount); + using WaveFileWriter writer = new WaveFileWriter(path, waveFormat); + var bytes = NAudioCodec.To16BitsBytes(buffer); + writer.Write(bytes, 0, bytes.Length); } public IAudioStream Resample(IAudioProvider input, int outputSamplingRate) { - return null; + // return new ResampledAudioStream(input, outputSamplingRate); + return new NAudioCodec.NAudioResamplerStream(input, outputSamplingRate); } - private unsafe class FileAudioStream : IAudioStream + private unsafe class FileDecoderStream : IAudioStream { public int SamplingRate => _codecContext != null ? _codecContext->sample_rate : 0; public int ChannelCount => _codecContext != null ? _codecContext->ch_layout.nb_channels : 0; public int SamplesPerChannel => (int)_samples; - public FileAudioStream(string fileName) + public FileDecoderStream(string fileName) { + _cachedBuffer = new List(); + _cachedBufferPos = 0; try { OpenFile(fileName); } catch (Exception e) { + Console.WriteLine(e); CloseFile(); throw; } @@ -53,11 +157,67 @@ public FileAudioStream(string fileName) public void Dispose() { - CloseFile(); + if (_fileName != string.Empty) + { + CloseFile(); + } } public void Read(float[] buffer, int offset, int count) { + var channels = ChannelCount; + var samplesPerChannel = count / channels; + + // 解码 + var bytesRequired = ffmpeg.av_samples_get_buffer_size(null, channels, + samplesPerChannel, _format, 1); + var bytes = new byte[bytesRequired]; + var bytesRead = Decode(bytes, bytesRequired); + if (bytesRead == 0) + { + return; + } + + // 将解码的字节流转为浮点 + switch (_format) + { + case AVSampleFormat.AV_SAMPLE_FMT_U8: + case AVSampleFormat.AV_SAMPLE_FMT_U8P: + { + Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); + break; + } + case AVSampleFormat.AV_SAMPLE_FMT_S16: + case AVSampleFormat.AV_SAMPLE_FMT_S16P: + { + Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); + break; + } + case AVSampleFormat.AV_SAMPLE_FMT_S32: + case AVSampleFormat.AV_SAMPLE_FMT_S32P: + { + Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); + break; + } + case AVSampleFormat.AV_SAMPLE_FMT_FLT: + case AVSampleFormat.AV_SAMPLE_FMT_FLTP: + { + Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); + break; + } + case AVSampleFormat.AV_SAMPLE_FMT_DBL: + case AVSampleFormat.AV_SAMPLE_FMT_DBLP: + { + Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); + break; + } + case AVSampleFormat.AV_SAMPLE_FMT_S64: + case AVSampleFormat.AV_SAMPLE_FMT_S64P: + { + Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); + break; + } + } } // 文件信息 @@ -72,9 +232,10 @@ public void Read(float[] buffer, int offset, int count) // 音频信息 private int _audioIndex; // 音频流序号 private long _samples; // 不包括声道 + private AVSampleFormat _format; // 内部缓冲区相关 - SimpleVector _cachedBuffer; // 缓冲区 + List _cachedBuffer; // 缓冲区 int _cachedBufferPos; // 缓冲区读取位置 // 打开音频 @@ -139,10 +300,15 @@ private void OpenFile(string fileName) _samples = (long)(stream->duration * codec_ctx->sample_rate * stream->time_base.num / (float)stream->time_base.den ); - - // 初始化缓冲区 - _cachedBuffer = new SimpleVector(); - _cachedBufferPos = 0; + _format = (AVSampleFormat)codec_param->format; + + // 初始化数据包和数据帧 + var pkt = ffmpeg.av_packet_alloc(); + var frame = ffmpeg.av_frame_alloc(); + + // 等待进一步的解码 + _packet = pkt; + _frame = frame; } private void CloseFile() @@ -174,7 +340,12 @@ private void CloseFile() ffmpeg.avformat_close_input(&fmt_ctx); } + _cachedBuffer.Clear(); + _cachedBufferPos = 0; + + _format = 0; _samples = 0; + _audioIndex = 0; _frame = null; @@ -196,15 +367,22 @@ private int Decode(byte[] buf, int requiredSize) // 采取边解码边写到输出缓冲区的方式。策略是先把 cache 全部写出,然后边解码边写,写到最后剩下的再存入 cache int bytesWritten = 0; { - var cacheSize = Math.Min(_cachedBuffer.Size - _cachedBufferPos, requiredSize); + var cacheSize = Math.Min(_cachedBuffer.Count - _cachedBufferPos, requiredSize); if (cacheSize > 0) { - Buffer.BlockCopy(_cachedBuffer.Data, _cachedBufferPos, buf, 0, cacheSize); + Buffer.BlockCopy(_cachedBuffer.GetBuffer(), _cachedBufferPos, buf, 0, cacheSize); _cachedBufferPos += cacheSize; bytesWritten = cacheSize; } + + // 如果 cache 用完了,那么清除 cache + if (_cachedBufferPos == _cachedBuffer.Count) + { + _cachedBuffer.Resize(0); + } } + // 如果 cache 不够需要,那么继续从音频中读取 while (bytesWritten < requiredSize) { int ret = ffmpeg.av_read_frame(fmt_ctx, pkt); @@ -249,7 +427,8 @@ private int Decode(byte[] buf, int requiredSize) // 结束 break; } - else if (ret < 0) + + if (ret < 0) { // 出错 ffmpeg.av_frame_unref(frame); @@ -259,21 +438,44 @@ private int Decode(byte[] buf, int requiredSize) continue; } - int size_need = requiredSize - bytesWritten; - int size_supply = ffmpeg.av_samples_get_buffer_size(null, frame->ch_layout.nb_channels, - frame->nb_samples, (AVSampleFormat)frame->format, 1); + var sampleFormat = (AVSampleFormat)frame->format; + var sampleCount = frame->nb_samples; + var channelCount = frame->ch_layout.nb_channels; - var arr = frame->data[0]; + var bytesNeeded = requiredSize - bytesWritten; + var bytesSupply = ffmpeg.av_samples_get_buffer_size(null, channelCount, + sampleCount, sampleFormat, 1); - var size_cached = size_supply - size_need; - if (size_cached > 0) + var nonPlainBuffer = new byte[bytesSupply]; + if (ffmpeg.av_sample_fmt_is_planar(sampleFormat) != 0) + { + // 平面格式 + var bytesPerSample = ffmpeg.av_get_bytes_per_sample(sampleFormat); + for (var i = 0; i < sampleCount; ++i) + { + for (var j = 0; j < channelCount; ++j) + { + var src = frame->extended_data[j] + i * bytesPerSample; + var dstIdx = (i * channelCount + j) * bytesPerSample; + Marshal.Copy((IntPtr)src, nonPlainBuffer, dstIdx, bytesPerSample); + } + } + } + else + { + // 交织格式 + Marshal.Copy((IntPtr)frame->data[0], nonPlainBuffer, 0, bytesSupply); + } + + var sizeToCache = bytesSupply - bytesNeeded; + if (sizeToCache > 0) { // 写到输出缓冲区 - Marshal.Copy((IntPtr)arr, buf, bytesWritten, size_need); + Buffer.BlockCopy(nonPlainBuffer, 0, buf, bytesWritten, bytesNeeded); // 剩下的存入 cache - _cachedBuffer.Resize(size_cached); - Marshal.Copy(IntPtr.Add((IntPtr)arr, size_need), _cachedBuffer.Data, 0, size_cached); + _cachedBuffer.Resize(sizeToCache); + Buffer.BlockCopy(nonPlainBuffer, bytesNeeded, _cachedBuffer.GetBuffer(), 0, sizeToCache); _cachedBufferPos = 0; bytesWritten = requiredSize; @@ -281,8 +483,8 @@ private int Decode(byte[] buf, int requiredSize) else { // 全部写到输出缓冲区 - Marshal.Copy((IntPtr)arr, buf, bytesWritten, size_supply); - bytesWritten += size_supply; + Buffer.BlockCopy(nonPlainBuffer, 0, buf, bytesWritten, bytesSupply); + bytesWritten += bytesSupply; } ffmpeg.av_frame_unref(frame); @@ -293,29 +495,93 @@ private int Decode(byte[] buf, int requiredSize) } } - protected unsafe class ResampledAudioStream : IAudioStream + private unsafe class ResampledAudioStream : IAudioStream { public int SamplingRate { get; } public int ChannelCount { get; } public int SamplesPerChannel { get; } - public ResampledAudioStream(IAudioStream input, int sampleRate) + public ResampledAudioStream(IAudioProvider input, int sampleRate) { SamplingRate = sampleRate; ChannelCount = input.ChannelCount; SamplesPerChannel = (int)((long)input.SamplesPerChannel * sampleRate / input.SamplingRate); - _stream = input; + _provider = input; + + try + { + OpenResampler(); + } + catch (Exception e) + { + CloseResampler(); + throw; + } } public void Read(float[] buffer, int offset, int count) { + var channels = ChannelCount; + var dstSamplesPerChannel = count / channels; + + var srcSamplesPerChannel = ffmpeg.av_rescale_rnd(ffmpeg.swr_get_delay(_swrContext, _provider.SamplingRate) + + dstSamplesPerChannel, _provider.SamplingRate, SamplingRate, + AVRounding.AV_ROUND_UP); } public void Dispose() { + if (_swrContext != null) + { + CloseResampler(); + } } - private IAudioStream _stream; + private readonly IAudioProvider _provider; + + // FFmpeg 数据 + private SwrContext* _swrContext; + + private void OpenResampler() + { + // 初始化重采样器 + var swr = ffmpeg.swr_alloc(); + _swrContext = swr; + + // 初始化输入输出声道 + int ret; + { + AVChannelLayout chLayout; + ffmpeg.av_channel_layout_default(&chLayout, ChannelCount); + + ret = ffmpeg.swr_alloc_set_opts2(&swr, &chLayout, AVSampleFormat.AV_SAMPLE_FMT_FLT, + SamplingRate, &chLayout, + AVSampleFormat.AV_SAMPLE_FMT_FLT, _provider.SamplingRate, 0, null); + + ffmpeg.av_channel_layout_uninit(&chLayout); + } + if (ret != 0) + { + throw new DecoderFallbackException("FFmpeg: Failed to create resampler."); + } + + ret = ffmpeg.swr_init(swr); + if (ret < 0) + { + throw new DecoderFallbackException("FFmpeg: Failed to init resampler."); + } + } + + private void CloseResampler() + { + var swr_ctx = _swrContext; + if (swr_ctx != null) + { + ffmpeg.swr_close(swr_ctx); + ffmpeg.swr_free(&swr_ctx); + } + _swrContext = null; + } } } \ No newline at end of file diff --git a/TuneLab/Audio/FFmpeg/SimpleVector.cs b/TuneLab/Audio/FFmpeg/SimpleVector.cs deleted file mode 100644 index 1e4458e4..00000000 --- a/TuneLab/Audio/FFmpeg/SimpleVector.cs +++ /dev/null @@ -1,74 +0,0 @@ -using System; - -namespace TuneLab.Audio.FFmpeg; - -public class SimpleVector -{ - private T[] _data; - private int _size; - private int _capacity; - - public SimpleVector(int capacity = 10) - { - _size = 0; - _capacity = capacity; - _data = new T[_capacity]; - } - - public int Size => _size; - public int Capacity => _capacity; - public T this[int index] => _data[index]; - public T[] Data => _data; - - public void Append(T item) - { - var newSize = _size + 1; - if (newSize > _capacity) - { - AllocateSpace(newSize * 2); - } - - _data[_size] = item; - _size = newSize; - } - - public void Append(params T[] items) - { - var newSize = _size + items.Length; - if (newSize > _capacity) - { - AllocateSpace(newSize * 2); - } - - Array.Copy(items, 0, _data, _size, items.Length); - _size = newSize; - } - - public void Resize(int newSize) - { - if (newSize < 0) - throw new ArgumentOutOfRangeException(nameof(newSize), "New size must be non-negative."); - if (newSize > _capacity) - { - AllocateSpace(newSize); - } - - _size = newSize; - } - - public void Reserve(int newCapacity) - { - if (newCapacity > _capacity) - { - AllocateSpace(newCapacity); - } - } - - private void AllocateSpace(int capacity) - { - var newItems = new T[capacity]; - Array.Copy(_data, 0, newItems, 0, Math.Min(_size, capacity)); - _data = newItems; - _capacity = capacity; - } -} \ No newline at end of file From e6158b836a1ca893bd35050c0665be1d07bdba49 Mon Sep 17 00:00:00 2001 From: Sine Striker Date: Sat, 15 Jun 2024 05:45:42 +0800 Subject: [PATCH 3/8] Implement FFmpeg Resampler --- TuneLab/Audio/FFmpeg/FFmpegCodec.cs | 655 +++++++++++++++++++--------- 1 file changed, 450 insertions(+), 205 deletions(-) diff --git a/TuneLab/Audio/FFmpeg/FFmpegCodec.cs b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs index c2b7c461..89d4deab 100644 --- a/TuneLab/Audio/FFmpeg/FFmpegCodec.cs +++ b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs @@ -1,7 +1,7 @@ using System; -using System.Collections; using System.Collections.Generic; using System.IO; +using System.Linq; using System.Reflection; using System.Runtime.InteropServices; using System.Text; @@ -22,68 +22,130 @@ public static T[] GetBuffer(this List list) return (T[])fieldInfo!.GetValue(list)!; } - public static unsafe void BytesToFloats(float[] dest, int destIndex, byte[] bytes, int bytesSize, - int channelCount) + public static unsafe void BytesToFloats(float* dest, byte* src, int size, int channelCount) { int sizeOfT = Marshal.SizeOf(); - int totalSamples = bytesSize / (sizeOfT * channelCount) * channelCount; + int totalSamples = size / (sizeOfT * channelCount) * channelCount; - fixed (byte* src = bytes) + if (typeof(T) == typeof(byte)) { - if (typeof(T) == typeof(byte)) + const float max = byte.MaxValue; + for (var i = 0; i < totalSamples; i++) { - const float max = byte.MaxValue; - for (var i = 0; i < totalSamples; i++) - { - var intPtr = src + i * sizeOfT; - dest[i + destIndex] = *intPtr / max; - } + var intPtr = src + i * sizeOfT; + dest[i] = *intPtr / max; + } - return; + return; + } + + if (typeof(T) == typeof(int)) + { + const float max = int.MaxValue; + for (var i = 0; i < totalSamples; i++) + { + var intPtr = (int*)(src + i * sizeOfT); + dest[i] = *intPtr / max; } - if (typeof(T) == typeof(int)) + return; + } + + if (typeof(T) == typeof(short)) + { + const float max = short.MaxValue; + for (var i = 0; i < totalSamples; i++) { - const float max = int.MaxValue; - for (var i = 0; i < totalSamples; i++) - { - var intPtr = (int*)(src + i * sizeOfT); - dest[i + destIndex] = *intPtr / max; - } + var shortPtr = (short*)(src + i * sizeOfT); + dest[i] = *shortPtr / max; + } - return; + return; + } + + if (typeof(T) == typeof(float)) + { + for (var i = 0; i < totalSamples; i++) + { + var floatPtr = (float*)(src + i * sizeOfT); + dest[i] = *floatPtr; } - if (typeof(T) == typeof(short)) + return; + } + + if (typeof(T) == typeof(double)) + { + for (var i = 0; i < totalSamples; i++) { - const float max = short.MaxValue; - for (var i = 0; i < totalSamples; i++) - { - var shortPtr = (short*)(src + i * sizeOfT); - dest[i] = *shortPtr / max; - } + var doublePtr = (double*)(src + i * sizeOfT); + dest[i] = (float)*doublePtr; + } + } + } - return; + public static unsafe void FloatsToBytes(byte* dest, float* src, int size, int channelCount) + { + int sizeOfT = Marshal.SizeOf(); + int totalSamples = size / channelCount * channelCount; + + if (typeof(T) == typeof(byte)) + { + const float max = byte.MaxValue; + for (var i = 0; i < totalSamples; i++) + { + var byteValue = (byte)(src[i] * max); + var bytePtr = dest + i * sizeOfT; + *bytePtr = byteValue; } - if (typeof(T) == typeof(float)) + return; + } + + if (typeof(T) == typeof(int)) + { + const float max = int.MaxValue; + for (var i = 0; i < totalSamples; i++) { - for (var i = 0; i < totalSamples; i++) - { - var floatPtr = (float*)(src + i * sizeOfT); - dest[i] = *floatPtr; - } + var intValue = (int)(src[i] * max); + var intPtr = (int*)(dest + i * sizeOfT); + *intPtr = intValue; + } - return; + return; + } + + if (typeof(T) == typeof(short)) + { + const float max = short.MaxValue; + for (var i = 0; i < totalSamples; i++) + { + var shortValue = (short)(src[i] * max); + var shortPtr = (short*)(dest + i * sizeOfT); + *shortPtr = shortValue; } - if (typeof(T) == typeof(double)) + return; + } + + if (typeof(T) == typeof(float)) + { + for (var i = 0; i < totalSamples; i++) { - for (var i = 0; i < totalSamples; i++) - { - var doublePtr = (double*)(src + i * sizeOfT); - dest[i] = (float)*doublePtr; - } + var floatPtr = (float*)(dest + i * sizeOfT); + *floatPtr = src[i]; + } + + return; + } + + if (typeof(T) == typeof(double)) + { + for (var i = 0; i < totalSamples; i++) + { + var doubleValue = (double)src[i]; + var doublePtr = (double*)(dest + i * sizeOfT); + *doublePtr = doubleValue; } } } @@ -91,7 +153,8 @@ public static unsafe void BytesToFloats(float[] dest, int destIndex, byte[] b internal class FFmpegCodec : IAudioCodec { - public IEnumerable AllDecodableFormats { get; } = ["wav", "mp3", "aiff", "aac", "wma", "mp4"]; + public IEnumerable AllDecodableFormats { get; } = + ["wav", "mp3", "aiff", "aac", "wma", "mp4", "m4a", "flac"]; public FFmpegCodec(string libraryDir) { @@ -111,7 +174,13 @@ public FFmpegCodec(string libraryDir) public AudioInfo GetAudioInfo(string path) { - return new AudioInfo(); + using (var stream = new FileDecoderStream(path)) + { + return new AudioInfo() + { + duration = stream.Duration(), + }; + } } public IAudioStream Decode(string path) @@ -129,11 +198,88 @@ public void EncodeToWav(string path, float[] buffer, int samplingRate, int bitPe public IAudioStream Resample(IAudioProvider input, int outputSamplingRate) { - // return new ResampledAudioStream(input, outputSamplingRate); - return new NAudioCodec.NAudioResamplerStream(input, outputSamplingRate); + return new ResampledAudioStream(input, outputSamplingRate); + // return new NAudioCodec.NAudioResamplerStream(input, outputSamplingRate); } - private unsafe class FileDecoderStream : IAudioStream + private abstract class CacheableStream + { + public CacheableStream() + { + _cachedBuffer = new List(); + _cachedBufferPos = 0; + } + + protected int Decode(byte[] buf, int requiredSize) + { + // 采取边解码边写到输出缓冲区的方式。策略是先把 cache 全部写出,然后边解码边写,写到最后剩下的再存入 cache + int bytesWritten = 0; + if (!_cachedBuffer.IsEmpty()) + { + var cacheSize = Math.Min(_cachedBuffer.Count - _cachedBufferPos, requiredSize); + if (cacheSize > 0) + { + Buffer.BlockCopy(_cachedBuffer.GetBuffer(), _cachedBufferPos, buf, 0, cacheSize); + _cachedBufferPos += cacheSize; + bytesWritten = cacheSize; + } + + // 如果 cache 用完了,那么清除 cache + if (_cachedBufferPos == _cachedBuffer.Count) + { + _cachedBuffer.Clear(); + _cachedBufferPos = 0; + } + } + + // 如果 cache 不够需要,那么继续读取 + while (bytesWritten < requiredSize) + { + var decodedBytes = DecodeOnce(); + if (decodedBytes == null) + { + break; + } + + if (decodedBytes.IsEmpty()) + { + continue; + } + + var bytesNeeded = requiredSize - bytesWritten; + var bytesSupply = (int)decodedBytes.Length; + var sizeToCache = bytesSupply - bytesNeeded; + if (sizeToCache > 0) + { + // 写到输出缓冲区 + Buffer.BlockCopy(decodedBytes, 0, buf, bytesWritten, bytesNeeded); + + // 剩下的存入 cache + _cachedBuffer.AddRange(Enumerable.Repeat(0, sizeToCache)); + _cachedBufferPos = 0; + Buffer.BlockCopy(decodedBytes, bytesNeeded, _cachedBuffer.GetBuffer(), 0, sizeToCache); + + bytesWritten = requiredSize; + } + else + { + // 全部写到输出缓冲区 + Buffer.BlockCopy(decodedBytes, 0, buf, bytesWritten, bytesSupply); + bytesWritten += bytesSupply; + } + } + + return bytesWritten; + } + + protected abstract byte[]? DecodeOnce(); + + // 内部缓冲区相关 + protected List _cachedBuffer; // 缓冲区 + protected int _cachedBufferPos; // 缓冲区读取位置 + } + + private unsafe class FileDecoderStream : CacheableStream, IAudioStream { public int SamplingRate => _codecContext != null ? _codecContext->sample_rate : 0; public int ChannelCount => _codecContext != null ? _codecContext->ch_layout.nb_channels : 0; @@ -149,7 +295,6 @@ public FileDecoderStream(string fileName) } catch (Exception e) { - Console.WriteLine(e); CloseFile(); throw; } @@ -179,47 +324,59 @@ public void Read(float[] buffer, int offset, int count) } // 将解码的字节流转为浮点 - switch (_format) + fixed (float* dest = &buffer[offset]) { - case AVSampleFormat.AV_SAMPLE_FMT_U8: - case AVSampleFormat.AV_SAMPLE_FMT_U8P: - { - Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); - break; - } - case AVSampleFormat.AV_SAMPLE_FMT_S16: - case AVSampleFormat.AV_SAMPLE_FMT_S16P: - { - Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); - break; - } - case AVSampleFormat.AV_SAMPLE_FMT_S32: - case AVSampleFormat.AV_SAMPLE_FMT_S32P: - { - Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); - break; - } - case AVSampleFormat.AV_SAMPLE_FMT_FLT: - case AVSampleFormat.AV_SAMPLE_FMT_FLTP: + fixed (byte* src = bytes) { - Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); - break; - } - case AVSampleFormat.AV_SAMPLE_FMT_DBL: - case AVSampleFormat.AV_SAMPLE_FMT_DBLP: - { - Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); - break; - } - case AVSampleFormat.AV_SAMPLE_FMT_S64: - case AVSampleFormat.AV_SAMPLE_FMT_S64P: - { - Utils.BytesToFloats(buffer, offset, bytes, bytesRead, channels); - break; + switch (_format) + { + case AVSampleFormat.AV_SAMPLE_FMT_U8: + case AVSampleFormat.AV_SAMPLE_FMT_U8P: + { + Utils.BytesToFloats(dest, src, bytesRead, channels); + break; + } + case AVSampleFormat.AV_SAMPLE_FMT_S16: + case AVSampleFormat.AV_SAMPLE_FMT_S16P: + { + Utils.BytesToFloats(dest, src, bytesRead, channels); + break; + } + case AVSampleFormat.AV_SAMPLE_FMT_S32: + case AVSampleFormat.AV_SAMPLE_FMT_S32P: + { + Utils.BytesToFloats(dest, src, bytesRead, channels); + break; + } + case AVSampleFormat.AV_SAMPLE_FMT_FLT: + case AVSampleFormat.AV_SAMPLE_FMT_FLTP: + { + Utils.BytesToFloats(dest, src, bytesRead, channels); + break; + } + case AVSampleFormat.AV_SAMPLE_FMT_DBL: + case AVSampleFormat.AV_SAMPLE_FMT_DBLP: + { + Utils.BytesToFloats(dest, src, bytesRead, channels); + break; + } + case AVSampleFormat.AV_SAMPLE_FMT_S64: + case AVSampleFormat.AV_SAMPLE_FMT_S64P: + { + Utils.BytesToFloats(dest, src, bytesRead, channels); + break; + } + } } } } + public double Duration() + { + var stream = _formatContext->streams[_audioIndex]; + return stream->duration * stream->time_base.num / (double)stream->time_base.den; + } + // 文件信息 private string _fileName; @@ -234,10 +391,6 @@ public void Read(float[] buffer, int offset, int count) private long _samples; // 不包括声道 private AVSampleFormat _format; - // 内部缓冲区相关 - List _cachedBuffer; // 缓冲区 - int _cachedBufferPos; // 缓冲区读取位置 - // 打开音频 private void OpenFile(string fileName) { @@ -298,10 +451,10 @@ private void OpenFile(string fileName) } _samples = (long)(stream->duration * codec_ctx->sample_rate * - stream->time_base.num / (float)stream->time_base.den + stream->time_base.num / (double)stream->time_base.den ); _format = (AVSampleFormat)codec_param->format; - + // 初始化数据包和数据帧 var pkt = ffmpeg.av_packet_alloc(); var frame = ffmpeg.av_frame_alloc(); @@ -356,7 +509,7 @@ private void CloseFile() _fileName = string.Empty; } - private int Decode(byte[] buf, int requiredSize) + protected override byte[]? DecodeOnce() { var fmt_ctx = _formatContext; var codec_ctx = _codecContext; @@ -364,150 +517,116 @@ private int Decode(byte[] buf, int requiredSize) var pkt = _packet; var frame = _frame; - // 采取边解码边写到输出缓冲区的方式。策略是先把 cache 全部写出,然后边解码边写,写到最后剩下的再存入 cache - int bytesWritten = 0; + var ret = ffmpeg.av_read_frame(fmt_ctx, pkt); + + // 判断是否结束 + if (ret == ffmpeg.AVERROR_EOF) { - var cacheSize = Math.Min(_cachedBuffer.Count - _cachedBufferPos, requiredSize); - if (cacheSize > 0) - { - Buffer.BlockCopy(_cachedBuffer.GetBuffer(), _cachedBufferPos, buf, 0, cacheSize); - _cachedBufferPos += cacheSize; - bytesWritten = cacheSize; - } + ffmpeg.av_packet_unref(pkt); + return null; + } - // 如果 cache 用完了,那么清除 cache - if (_cachedBufferPos == _cachedBuffer.Count) - { - _cachedBuffer.Resize(0); - } + // 忽略其他错误 + if (ret != 0) + { + Console.WriteLine($"FFmpeg: Error getting next frame with code {-ret:x}, ignored."); + return Array.Empty(); } - // 如果 cache 不够需要,那么继续从音频中读取 - while (bytesWritten < requiredSize) + // 跳过其他流 + if (pkt->stream_index != _audioIndex) { - int ret = ffmpeg.av_read_frame(fmt_ctx, pkt); + ffmpeg.av_packet_unref(pkt); + return Array.Empty(); + } - // 判断是否结束 - if (ret == ffmpeg.AVERROR_EOF) - { - ffmpeg.av_packet_unref(pkt); - break; - } + // 发送待解码包 + ret = ffmpeg.avcodec_send_packet(codec_ctx, pkt); + ffmpeg.av_packet_unref(pkt); - if (ret != 0) - { - // 忽略 - Console.WriteLine($"FFmpeg: Error getting next frame with code {-ret:x}, ignored."); - continue; - } + // 忽略错误 + if (ret < 0) + { + Console.WriteLine($"FFmpeg: Error submitting a packet for decoding with code {-ret:x}, ignored."); + return Array.Empty(); + } + + // 读取当前包的所有内容 + var buffer = new List(); + while (ret >= 0) + { + // 接收解码数据 + ret = ffmpeg.avcodec_receive_frame(codec_ctx, frame); - // 跳过其他流 - if (pkt->stream_index != _audioIndex) + // 判断是否结束 + if (ret == ffmpeg.AVERROR_EOF || ret == ffmpeg.AVERROR(ffmpeg.EAGAIN)) { - ffmpeg.av_packet_unref(pkt); - continue; + break; } - // 发送待解码包 - ret = ffmpeg.avcodec_send_packet(codec_ctx, pkt); - ffmpeg.av_packet_unref(pkt); + // 忽略当前错误 if (ret < 0) { - // 忽略 - Console.WriteLine($"FFmpeg: Error submitting a packet for decoding with code {-ret:x}, ignored."); + ffmpeg.av_frame_unref(frame); + Console.WriteLine($"FFmpeg: Error decoding frame with code {-ret:x}, ignored."); continue; } - while (ret >= 0) - { - // 接收解码数据 - ret = ffmpeg.avcodec_receive_frame(codec_ctx, frame); - if (ret == ffmpeg.AVERROR_EOF || ret == ffmpeg.AVERROR(ffmpeg.EAGAIN)) - { - // 结束 - break; - } - - if (ret < 0) - { - // 出错 - ffmpeg.av_frame_unref(frame); - - // 忽略 - Console.WriteLine($"FFmpeg: Error decoding frame with code {-ret:x}, ignored."); - continue; - } - - var sampleFormat = (AVSampleFormat)frame->format; - var sampleCount = frame->nb_samples; - var channelCount = frame->ch_layout.nb_channels; + // 接收数据 + var sampleFormat = (AVSampleFormat)frame->format; + var sampleCount = frame->nb_samples; + var channelCount = frame->ch_layout.nb_channels; + var bytesSupply = ffmpeg.av_samples_get_buffer_size(null, channelCount, + sampleCount, sampleFormat, 1); - var bytesNeeded = requiredSize - bytesWritten; - var bytesSupply = ffmpeg.av_samples_get_buffer_size(null, channelCount, - sampleCount, sampleFormat, 1); - - var nonPlainBuffer = new byte[bytesSupply]; - if (ffmpeg.av_sample_fmt_is_planar(sampleFormat) != 0) + var nonPlainBuffer = new byte[bytesSupply]; + if (ffmpeg.av_sample_fmt_is_planar(sampleFormat) != 0) + { + // 平面格式 + var bytesPerSample = ffmpeg.av_get_bytes_per_sample(sampleFormat); + for (var i = 0; i < sampleCount; ++i) { - // 平面格式 - var bytesPerSample = ffmpeg.av_get_bytes_per_sample(sampleFormat); - for (var i = 0; i < sampleCount; ++i) + for (var j = 0; j < channelCount; ++j) { - for (var j = 0; j < channelCount; ++j) - { - var src = frame->extended_data[j] + i * bytesPerSample; - var dstIdx = (i * channelCount + j) * bytesPerSample; - Marshal.Copy((IntPtr)src, nonPlainBuffer, dstIdx, bytesPerSample); - } + var src = frame->extended_data[j] + i * bytesPerSample; + var dstIdx = (i * channelCount + j) * bytesPerSample; + Marshal.Copy((IntPtr)src, nonPlainBuffer, dstIdx, bytesPerSample); } } - else - { - // 交织格式 - Marshal.Copy((IntPtr)frame->data[0], nonPlainBuffer, 0, bytesSupply); - } - - var sizeToCache = bytesSupply - bytesNeeded; - if (sizeToCache > 0) - { - // 写到输出缓冲区 - Buffer.BlockCopy(nonPlainBuffer, 0, buf, bytesWritten, bytesNeeded); - - // 剩下的存入 cache - _cachedBuffer.Resize(sizeToCache); - Buffer.BlockCopy(nonPlainBuffer, bytesNeeded, _cachedBuffer.GetBuffer(), 0, sizeToCache); - _cachedBufferPos = 0; + } + else + { + // 交织格式 + Marshal.Copy((IntPtr)frame->data[0], nonPlainBuffer, 0, bytesSupply); + } - bytesWritten = requiredSize; - } - else - { - // 全部写到输出缓冲区 - Buffer.BlockCopy(nonPlainBuffer, 0, buf, bytesWritten, bytesSupply); - bytesWritten += bytesSupply; - } + buffer.AddRange(nonPlainBuffer); - ffmpeg.av_frame_unref(frame); - } + ffmpeg.av_frame_unref(frame); } - return bytesWritten; + return buffer.ToArray(); } } - private unsafe class ResampledAudioStream : IAudioStream + private unsafe class ResampledAudioStream : CacheableStream, IAudioStream { public int SamplingRate { get; } public int ChannelCount { get; } public int SamplesPerChannel { get; } + public int InputSamplingRate { get; } + public ResampledAudioStream(IAudioProvider input, int sampleRate) { SamplingRate = sampleRate; ChannelCount = input.ChannelCount; SamplesPerChannel = (int)((long)input.SamplesPerChannel * sampleRate / input.SamplingRate); + InputSamplingRate = input.SamplingRate; _provider = input; + _cachedBuffer = new List(); + _cachedBufferPos = 0; try { @@ -523,16 +642,28 @@ public ResampledAudioStream(IAudioProvider input, int sampleRate) public void Read(float[] buffer, int offset, int count) { var channels = ChannelCount; - var dstSamplesPerChannel = count / channels; - var srcSamplesPerChannel = ffmpeg.av_rescale_rnd(ffmpeg.swr_get_delay(_swrContext, _provider.SamplingRate) + - dstSamplesPerChannel, _provider.SamplingRate, SamplingRate, - AVRounding.AV_ROUND_UP); + // 解码 + var bytesRequired = count * sizeof(float); + var bytes = new byte[bytesRequired]; + var bytesRead = Decode(bytes, bytesRequired); + if (bytesRead == 0) + { + return; + } + + fixed (float* dest = &buffer[offset]) + { + fixed (byte* src = bytes) + { + Utils.BytesToFloats(dest, src, bytesRead, channels); + } + } } public void Dispose() { - if (_swrContext != null) + if (_swr_ctx != null) { CloseResampler(); } @@ -541,13 +672,24 @@ public void Dispose() private readonly IAudioProvider _provider; // FFmpeg 数据 - private SwrContext* _swrContext; - + private SwrContext* _swr_ctx; + private byte** _src_data; + private byte** _dst_data; + private int _src_linesize; + private int _dst_linesize; + private long _max_dst_nb_samples; + private long _dst_nb_samples; + + private const int src_nb_samples = 1024; + private const AVSampleFormat src_sample_fmt = AVSampleFormat.AV_SAMPLE_FMT_FLT; + private const AVSampleFormat dst_sample_fmt = AVSampleFormat.AV_SAMPLE_FMT_FLT; + + // https://github.com/FFmpeg/FFmpeg/blob/master/doc/examples/resample_audio.c private void OpenResampler() { // 初始化重采样器 var swr = ffmpeg.swr_alloc(); - _swrContext = swr; + _swr_ctx = swr; // 初始化输入输出声道 int ret; @@ -557,7 +699,7 @@ private void OpenResampler() ret = ffmpeg.swr_alloc_set_opts2(&swr, &chLayout, AVSampleFormat.AV_SAMPLE_FMT_FLT, SamplingRate, &chLayout, - AVSampleFormat.AV_SAMPLE_FMT_FLT, _provider.SamplingRate, 0, null); + AVSampleFormat.AV_SAMPLE_FMT_FLT, InputSamplingRate, 0, null); ffmpeg.av_channel_layout_uninit(&chLayout); } @@ -571,17 +713,120 @@ private void OpenResampler() { throw new DecoderFallbackException("FFmpeg: Failed to init resampler."); } + + var channels = ChannelCount; + var src_data = _src_data; + int src_linesize; + + /* allocate source and destination samples buffers */ + ret = ffmpeg.av_samples_alloc_array_and_samples(&src_data, &src_linesize, channels, + src_nb_samples, src_sample_fmt, 0); + if (ret < 0) + { + throw new DecoderFallbackException("FFmpeg: Could not allocate source samples."); + } + + _src_data = src_data; + _src_linesize = src_linesize; + + /* compute the number of converted samples: buffering is avoided + * ensuring that the output buffer will contain at least all the + * converted input samples */ + _max_dst_nb_samples = _dst_nb_samples = + ffmpeg.av_rescale_rnd(src_nb_samples, SamplingRate, InputSamplingRate, AVRounding.AV_ROUND_UP); + + /* buffer is going to be directly written to a rawaudio file, no alignment */ + var dst_data = _dst_data; + int dst_linesize; + ret = ffmpeg.av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, channels, + (int)_dst_nb_samples, dst_sample_fmt, 0); + if (ret < 0) + { + ffmpeg.av_freep(&src_data[0]); + ffmpeg.av_freep(&src_data); + throw new DecoderFallbackException("FFmpeg: Could not allocate destination samples."); + } + + _dst_data = dst_data; + _dst_linesize = dst_linesize; } private void CloseResampler() { - var swr_ctx = _swrContext; + var src_data = _src_data; + if (src_data != null) + ffmpeg.av_freep(&src_data[0]); + ffmpeg.av_freep(&src_data); + _src_data = null; + + var dst_data = _dst_data; + if (dst_data != null) + ffmpeg.av_freep(&dst_data[0]); + ffmpeg.av_freep(&dst_data); + _dst_data = null; + + var swr_ctx = _swr_ctx; if (swr_ctx != null) { ffmpeg.swr_close(swr_ctx); ffmpeg.swr_free(&swr_ctx); } - _swrContext = null; + + _swr_ctx = null; + } + + protected override byte[]? DecodeOnce() + { + var channels = ChannelCount; + var swr_ctx = _swr_ctx; + var dst_linesize = _dst_linesize; + int ret; + + /* generate synthetic audio */ + var srcBuffer = new float[src_nb_samples * channels]; + _provider.Read(srcBuffer, 0, srcBuffer.Length); + fixed (float* srcBufferPtr = srcBuffer) + { + var srcByteSize = srcBuffer.Length * sizeof(float); + Buffer.MemoryCopy(srcBufferPtr, _src_data[0], srcByteSize, srcByteSize); + } + + /* compute destination number of samples */ + _dst_nb_samples = ffmpeg.av_rescale_rnd(ffmpeg.swr_get_delay(swr_ctx, InputSamplingRate) + + src_nb_samples, SamplingRate, InputSamplingRate, + AVRounding.AV_ROUND_UP); + if (_dst_nb_samples > _max_dst_nb_samples) + { + ffmpeg.av_freep(&_dst_data[0]); + ret = ffmpeg.av_samples_alloc(_dst_data, &dst_linesize, channels, + (int)_dst_nb_samples, dst_sample_fmt, 1); + + if (ret < 0) + { + return null; + } + + _max_dst_nb_samples = _dst_nb_samples; + } + + /* convert to destination format */ + ret = ffmpeg.swr_convert(swr_ctx, _dst_data, (int)_dst_nb_samples, _src_data, src_nb_samples); + if (ret < 0) + { + throw new DecoderFallbackException("FFmpeg: Error while converting."); + } + + var dst_bufsize = ffmpeg.av_samples_get_buffer_size(&dst_linesize, channels, + ret, dst_sample_fmt, 1); + + var buffer = new byte[dst_bufsize]; + fixed (byte* bufferPtr = buffer) + { + Buffer.MemoryCopy(_dst_data[0], bufferPtr, dst_bufsize, dst_bufsize); + } + + _dst_linesize = dst_linesize; + return buffer; } } } \ No newline at end of file From 2eea2299ce3f025c6aa8acab01598ad840f53d49 Mon Sep 17 00:00:00 2001 From: Sine Striker Date: Sat, 15 Jun 2024 19:30:09 +0800 Subject: [PATCH 4/8] Fix format free mistake --- TuneLab/Audio/FFmpeg/FFmpegCodec.cs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/TuneLab/Audio/FFmpeg/FFmpegCodec.cs b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs index 89d4deab..bd5cfca9 100644 --- a/TuneLab/Audio/FFmpeg/FFmpegCodec.cs +++ b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs @@ -154,7 +154,7 @@ public static unsafe void FloatsToBytes(byte* dest, float* src, int size, int internal class FFmpegCodec : IAudioCodec { public IEnumerable AllDecodableFormats { get; } = - ["wav", "mp3", "aiff", "aac", "wma", "mp4", "m4a", "flac"]; + ["wav", "mp3", "aac", "aiff", "m4a", "flac", "wma", "ogg", "opus"]; public FFmpegCodec(string libraryDir) { @@ -229,7 +229,7 @@ protected int Decode(byte[] buf, int requiredSize) { _cachedBuffer.Clear(); _cachedBufferPos = 0; - } + } } // 如果 cache 不够需要,那么继续读取 @@ -396,16 +396,16 @@ private void OpenFile(string fileName) { _fileName = fileName; - var fmt_ctx = ffmpeg.avformat_alloc_context(); - _formatContext = fmt_ctx; - // 打开文件 + AVFormatContext* fmt_ctx = null; var ret = ffmpeg.avformat_open_input(&fmt_ctx, fileName, null, null); if (ret != 0) { throw new FileLoadException($"FFmpeg: Failed to load file {fileName}.", fileName); } + _formatContext = fmt_ctx; + // 查找流信息 ret = ffmpeg.avformat_find_stream_info(fmt_ctx, null); if (ret < 0) @@ -450,9 +450,9 @@ private void OpenFile(string fileName) throw new DecoderFallbackException("FFmpeg: Failed to open decoder."); } - _samples = (long)(stream->duration * codec_ctx->sample_rate * + _samples = (long)Math.Round((stream->duration * codec_ctx->sample_rate * stream->time_base.num / (double)stream->time_base.den - ); + )); _format = (AVSampleFormat)codec_param->format; // 初始化数据包和数据帧 From 92c5d273b0ccb5d7f18af3098d280f98711b6fe9 Mon Sep 17 00:00:00 2001 From: Sine Striker Date: Fri, 19 Jul 2024 17:48:23 +0800 Subject: [PATCH 5/8] Rename Class Names --- TuneLab/Audio/FFmpeg/FFmpegCodec.cs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/TuneLab/Audio/FFmpeg/FFmpegCodec.cs b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs index bd5cfca9..e4942502 100644 --- a/TuneLab/Audio/FFmpeg/FFmpegCodec.cs +++ b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs @@ -202,15 +202,15 @@ public IAudioStream Resample(IAudioProvider input, int outputSamplingRate) // return new NAudioCodec.NAudioResamplerStream(input, outputSamplingRate); } - private abstract class CacheableStream + private abstract class FIFOStream { - public CacheableStream() + public FIFOStream() { - _cachedBuffer = new List(); + _cachedBuffer = new List(); _cachedBufferPos = 0; } - protected int Decode(byte[] buf, int requiredSize) + protected int Decode(T[] buf, int requiredSize) { // 采取边解码边写到输出缓冲区的方式。策略是先把 cache 全部写出,然后边解码边写,写到最后剩下的再存入 cache int bytesWritten = 0; @@ -241,6 +241,7 @@ protected int Decode(byte[] buf, int requiredSize) break; } + // 本次没读到任何内容 if (decodedBytes.IsEmpty()) { continue; @@ -255,7 +256,7 @@ protected int Decode(byte[] buf, int requiredSize) Buffer.BlockCopy(decodedBytes, 0, buf, bytesWritten, bytesNeeded); // 剩下的存入 cache - _cachedBuffer.AddRange(Enumerable.Repeat(0, sizeToCache)); + _cachedBuffer.AddRange(Enumerable.Repeat(default!, sizeToCache)); _cachedBufferPos = 0; Buffer.BlockCopy(decodedBytes, bytesNeeded, _cachedBuffer.GetBuffer(), 0, sizeToCache); @@ -275,11 +276,11 @@ protected int Decode(byte[] buf, int requiredSize) protected abstract byte[]? DecodeOnce(); // 内部缓冲区相关 - protected List _cachedBuffer; // 缓冲区 + protected List _cachedBuffer; // 缓冲区 protected int _cachedBufferPos; // 缓冲区读取位置 } - private unsafe class FileDecoderStream : CacheableStream, IAudioStream + private unsafe class FileDecoderStream : FIFOStream, IAudioStream { public int SamplingRate => _codecContext != null ? _codecContext->sample_rate : 0; public int ChannelCount => _codecContext != null ? _codecContext->ch_layout.nb_channels : 0; @@ -609,7 +610,7 @@ private void CloseFile() } } - private unsafe class ResampledAudioStream : CacheableStream, IAudioStream + private unsafe class ResampledAudioStream : FIFOStream, IAudioStream { public int SamplingRate { get; } public int ChannelCount { get; } From 460c2c7fe9cd329f27f49df7c8495766a69dc817 Mon Sep 17 00:00:00 2001 From: SineStriker <55847490+SineStriker@users.noreply.github.com> Date: Fri, 14 Feb 2025 20:32:31 +0800 Subject: [PATCH 6/8] Update --- TuneLab/App.axaml.cs | 6 +++++- TuneLab/Audio/FFmpeg/FFmpegCodec.cs | 32 ++++++++++++++--------------- TuneLab/Audio/NAudio/NAudioCodec.cs | 2 +- TuneLab/TuneLab.csproj | 1 + 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/TuneLab/App.axaml.cs b/TuneLab/App.axaml.cs index 4a4d5522..85aaa1a4 100644 --- a/TuneLab/App.axaml.cs +++ b/TuneLab/App.axaml.cs @@ -21,6 +21,8 @@ using System.Threading.Tasks; using System.Threading; using System.IO.Pipes; +using System.Reflection; +using TuneLab.Audio.FFmpeg; namespace TuneLab; @@ -48,7 +50,9 @@ public override void OnFrameworkInitializationCompleted() }; // init audio engine - AudioUtils.Init(new NAudioCodec()); + // AudioUtils.Init(new NAudioCodec()); + AudioUtils.Init(new FFmpegCodec(Path.Combine( + Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location)!, "ffmpeg"))); AudioEngine.SampleRate.Value = Settings.SampleRate; AudioEngine.BufferSize.Value = Settings.BufferSize; if (!string.IsNullOrEmpty(Settings.AudioDriver)) AudioEngine.CurrentDriver.Value = Settings.AudioDriver; diff --git a/TuneLab/Audio/FFmpeg/FFmpegCodec.cs b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs index e4942502..5b3f9d0c 100644 --- a/TuneLab/Audio/FFmpeg/FFmpegCodec.cs +++ b/TuneLab/Audio/FFmpeg/FFmpegCodec.cs @@ -188,18 +188,18 @@ public IAudioStream Decode(string path) return new FileDecoderStream(path); } - public void EncodeToWav(string path, float[] buffer, int samplingRate, int bitPerSample, int channelCount) + public void EncodeToWav(string path, float[] buffer, int SampleRate, int bitPerSample, int channelCount) { - WaveFormat waveFormat = new WaveFormat(samplingRate, 16, channelCount); + WaveFormat waveFormat = new WaveFormat(SampleRate, 16, channelCount); using WaveFileWriter writer = new WaveFileWriter(path, waveFormat); var bytes = NAudioCodec.To16BitsBytes(buffer); writer.Write(bytes, 0, bytes.Length); } - public IAudioStream Resample(IAudioProvider input, int outputSamplingRate) + public IAudioStream Resample(IAudioProvider input, int outputSampleRate) { - return new ResampledAudioStream(input, outputSamplingRate); - // return new NAudioCodec.NAudioResamplerStream(input, outputSamplingRate); + return new ResampledAudioStream(input, outputSampleRate); + // return new NAudioCodec.NAudioResamplerStream(input, outputSampleRate); } private abstract class FIFOStream @@ -282,7 +282,7 @@ protected int Decode(T[] buf, int requiredSize) private unsafe class FileDecoderStream : FIFOStream, IAudioStream { - public int SamplingRate => _codecContext != null ? _codecContext->sample_rate : 0; + public int SampleRate => _codecContext != null ? _codecContext->sample_rate : 0; public int ChannelCount => _codecContext != null ? _codecContext->ch_layout.nb_channels : 0; public int SamplesPerChannel => (int)_samples; @@ -612,18 +612,18 @@ private void CloseFile() private unsafe class ResampledAudioStream : FIFOStream, IAudioStream { - public int SamplingRate { get; } + public int SampleRate { get; } public int ChannelCount { get; } public int SamplesPerChannel { get; } - public int InputSamplingRate { get; } + public int InputSampleRate { get; } public ResampledAudioStream(IAudioProvider input, int sampleRate) { - SamplingRate = sampleRate; + SampleRate = sampleRate; ChannelCount = input.ChannelCount; - SamplesPerChannel = (int)((long)input.SamplesPerChannel * sampleRate / input.SamplingRate); - InputSamplingRate = input.SamplingRate; + SamplesPerChannel = (int)((long)input.SamplesPerChannel * sampleRate / input.SampleRate); + InputSampleRate = input.SampleRate; _provider = input; _cachedBuffer = new List(); @@ -699,8 +699,8 @@ private void OpenResampler() ffmpeg.av_channel_layout_default(&chLayout, ChannelCount); ret = ffmpeg.swr_alloc_set_opts2(&swr, &chLayout, AVSampleFormat.AV_SAMPLE_FMT_FLT, - SamplingRate, &chLayout, - AVSampleFormat.AV_SAMPLE_FMT_FLT, InputSamplingRate, 0, null); + SampleRate, &chLayout, + AVSampleFormat.AV_SAMPLE_FMT_FLT, InputSampleRate, 0, null); ffmpeg.av_channel_layout_uninit(&chLayout); } @@ -734,7 +734,7 @@ private void OpenResampler() * ensuring that the output buffer will contain at least all the * converted input samples */ _max_dst_nb_samples = _dst_nb_samples = - ffmpeg.av_rescale_rnd(src_nb_samples, SamplingRate, InputSamplingRate, AVRounding.AV_ROUND_UP); + ffmpeg.av_rescale_rnd(src_nb_samples, SampleRate, InputSampleRate, AVRounding.AV_ROUND_UP); /* buffer is going to be directly written to a rawaudio file, no alignment */ var dst_data = _dst_data; @@ -793,8 +793,8 @@ private void CloseResampler() } /* compute destination number of samples */ - _dst_nb_samples = ffmpeg.av_rescale_rnd(ffmpeg.swr_get_delay(swr_ctx, InputSamplingRate) + - src_nb_samples, SamplingRate, InputSamplingRate, + _dst_nb_samples = ffmpeg.av_rescale_rnd(ffmpeg.swr_get_delay(swr_ctx, InputSampleRate) + + src_nb_samples, SampleRate, InputSampleRate, AVRounding.AV_ROUND_UP); if (_dst_nb_samples > _max_dst_nb_samples) { diff --git a/TuneLab/Audio/NAudio/NAudioCodec.cs b/TuneLab/Audio/NAudio/NAudioCodec.cs index 2694a4cd..177ed02a 100644 --- a/TuneLab/Audio/NAudio/NAudioCodec.cs +++ b/TuneLab/Audio/NAudio/NAudioCodec.cs @@ -30,7 +30,7 @@ public AudioInfo GetAudioInfo(string path) return new AudioInfo() { duration = reader.TotalTime.TotalSeconds }; } - static byte[] To16BitsBytes(float[] data) + public static byte[] To16BitsBytes(float[] data) { byte[] results = new byte[data.Length * 2]; for (int i = 0; i < data.Length; i++) diff --git a/TuneLab/TuneLab.csproj b/TuneLab/TuneLab.csproj index 8bf75efc..0dc090c3 100644 --- a/TuneLab/TuneLab.csproj +++ b/TuneLab/TuneLab.csproj @@ -41,6 +41,7 @@ + From 1c6ae67fe6d79281625543b4fc648206b2ce9003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=87=91=E5=88=9A?= <1219318552@qq.com> Date: Sat, 15 Feb 2025 03:36:25 +0800 Subject: [PATCH 7/8] feat: add ffmpeg-fake vcpkg --- .gitignore | 4 +++ TuneLab/TuneLab.csproj | 33 +++++++++++++++++++++++ TuneLab/scripts/vcpkg-manifest/vcpkg.json | 14 ++++++++++ 3 files changed, 51 insertions(+) create mode 100644 TuneLab/scripts/vcpkg-manifest/vcpkg.json diff --git a/.gitignore b/.gitignore index 409d0016..e9f34942 100644 --- a/.gitignore +++ b/.gitignore @@ -455,3 +455,7 @@ $RECYCLE.BIN/ # Extensions Projects /[Ee]xtensions/ + +# vcpkg +/TuneLab/vcpkg/ +/TuneLab/scripts/vcpkg-overlay/ diff --git a/TuneLab/TuneLab.csproj b/TuneLab/TuneLab.csproj index 0dc090c3..fafcedee 100644 --- a/TuneLab/TuneLab.csproj +++ b/TuneLab/TuneLab.csproj @@ -10,6 +10,15 @@ latest 1.5.10 true + $(VCPKG_ROOT) + $(VCPKG_ROOT) + $(MSBuildThisFileDirectory)vcpkg + $(MSBuildThisFileDirectory)scripts\ + $(ScriptsPath)vcpkg-overlay\ + + x64-windows + x64-osx + x64-linux @@ -56,4 +65,28 @@ Always + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/TuneLab/scripts/vcpkg-manifest/vcpkg.json b/TuneLab/scripts/vcpkg-manifest/vcpkg.json new file mode 100644 index 00000000..f446704e --- /dev/null +++ b/TuneLab/scripts/vcpkg-manifest/vcpkg.json @@ -0,0 +1,14 @@ +{ + "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg-tool/main/docs/vcpkg.schema.json", + "dependencies": [ + "ffmpeg-fake" + ], + "vcpkg-configuration": { + "overlay-ports": [ + "../vcpkg-overlay/ports" + ], + "overlay-triplets": [ + "../vcpkg-overlay/triplets" + ] + } +} From ffaeaa3f1dea09555ac9be5d9b2970f80e6e302f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=87=91=E5=88=9A?= <1219318552@qq.com> Date: Sun, 16 Feb 2025 20:37:27 +0800 Subject: [PATCH 8/8] =?UTF-8?q?[feat]=E4=B8=8D=E4=BD=BF=E7=94=A8=E5=B7=B2?= =?UTF-8?q?=E5=AE=89=E8=A3=85=E7=9A=84vcpkg=E8=B7=AF=E5=BE=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TuneLab/TuneLab.csproj | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/TuneLab/TuneLab.csproj b/TuneLab/TuneLab.csproj index fafcedee..9ef239bc 100644 --- a/TuneLab/TuneLab.csproj +++ b/TuneLab/TuneLab.csproj @@ -10,9 +10,7 @@ latest 1.5.10 true - $(VCPKG_ROOT) - $(VCPKG_ROOT) - $(MSBuildThisFileDirectory)vcpkg + $(MSBuildThisFileDirectory)vcpkg\ $(MSBuildThisFileDirectory)scripts\ $(ScriptsPath)vcpkg-overlay\