From 4ddecb7c301756e8cf2139240b7e5ee00adce386 Mon Sep 17 00:00:00 2001 From: mapleafgo Date: Wed, 8 Apr 2026 19:39:58 +0800 Subject: [PATCH] =?UTF-8?q?feat(audio):=20=E6=B7=BB=E5=8A=A0=E9=9F=B3?= =?UTF-8?q?=E9=A2=91=E9=87=8D=E9=87=87=E6=A0=B7=E6=94=AF=E6=8C=81=EF=BC=8C?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=92=AD=E6=94=BE=E9=80=9F=E5=BA=A6=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 问题: - TTS 返回 16000 Hz 音频,但 Context 使用 44100 Hz - 播放速度快 2.75 倍(44100/16000) - 不同采样率的音频播放速度不正确 解决方案: - 集成 gomplerate 库(纯 Go,零依赖) - 自动检测音频采样率并重采样到 44100 Hz - 支持任意采样率的音频文件正常播放 技术实现: - resampler.go: 封装 gomplerate,实现流式重采样 - play.go: WAV/MP3 播放自动重采样 - loop.go: BGM 循环播放支持重采样 测试: - 所有单元测试通过(6/6) - 支持采样率自动转换(如 16000 Hz → 44100 Hz) 依赖: - github.com/zeozeozeo/gomplerate v0.0.0 --- go.mod | 1 + go.sum | 2 + pkg/audio/context.go | 8 +-- pkg/audio/loop.go | 17 ++++++- pkg/audio/play.go | 59 ++++++++++++++++------ pkg/audio/resampler.go | 108 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 174 insertions(+), 21 deletions(-) create mode 100644 pkg/audio/resampler.go diff --git a/go.mod b/go.mod index 3e52fca..732915a 100644 --- a/go.mod +++ b/go.mod @@ -53,6 +53,7 @@ require ( github.com/ysmood/gson v0.7.3 // indirect github.com/ysmood/leakless v0.9.0 // indirect github.com/zaf/g711 v1.4.0 // indirect + github.com/zeozeozeo/gomplerate v0.0.0-20250404113140-0fbb236df825 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect diff --git a/go.sum b/go.sum index e397670..4c9e153 100644 --- a/go.sum +++ b/go.sum @@ -157,6 +157,8 @@ github.com/ysmood/leakless v0.9.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8= github.com/zaf/g711 v1.4.0 h1:XZYkjjiAg9QTBnHqEg37m2I9q3IIDv5JRYXs2N8ma7c= github.com/zaf/g711 v1.4.0/go.mod h1:eCDXt3dSp/kYYAoooba7ukD/Q75jvAaS4WOMr0l1Roo= +github.com/zeozeozeo/gomplerate v0.0.0-20250404113140-0fbb236df825 h1:rViu1xhQRtdJogc39jF46PS01xHVD736JowXl2qOcPM= +github.com/zeozeozeo/gomplerate v0.0.0-20250404113140-0fbb236df825/go.mod h1:ASuMFHITnaVdPvMkoDGI4tTwYG9fW7Mxv2j5AuvTo8Q= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= diff --git a/pkg/audio/context.go b/pkg/audio/context.go index 77fc54e..7dbd90e 100644 --- a/pkg/audio/context.go +++ b/pkg/audio/context.go @@ -13,15 +13,15 @@ var ( ) const ( - DefaultSampleRate = 44100 // 采样率 - DefaultChannelCount = 2 // 声道数(立体声) + UniversalSampleRate = 44100 // 通用采样率(高质量音频) + DefaultChannelCount = 2 // 声道数(立体声) ) func initContext() (*oto.Context, error) { var initErr error otoOnce.Do(func() { op := &oto.NewContextOptions{} - op.SampleRate = DefaultSampleRate + op.SampleRate = UniversalSampleRate op.ChannelCount = DefaultChannelCount op.Format = oto.FormatSignedInt16LE @@ -31,7 +31,7 @@ func initContext() (*oto.Context, error) { return } <-ready - zap.S().Infoln("oto/v3 音频系统就绪") + zap.S().Infof("oto/v3 音频系统就绪 (%d Hz)", UniversalSampleRate) }) return otoCtx, initErr } diff --git a/pkg/audio/loop.go b/pkg/audio/loop.go index 1fb11ca..8d7e717 100644 --- a/pkg/audio/loop.go +++ b/pkg/audio/loop.go @@ -33,7 +33,21 @@ func PlayMP3Loop(r io.ReadCloser) (*oto.Player, func() error, error) { return nil, func() error { return nil }, err } - player := otoCtx.NewPlayer(dec) + // 获取采样率信息 + sampleRate := int(dec.SampleRate()) + targetRate := UniversalSampleRate + + // 需要重采样 + var reader io.Reader = dec + if needsResample(sampleRate, targetRate) { + resampleReader, err := newResamplingReader(dec, sampleRate, targetRate, 2) + if err != nil { + return nil, func() error { return nil }, err + } + reader = resampleReader + } + + player := otoCtx.NewPlayer(reader) playing := atomic.Bool{} playing.Store(true) @@ -48,6 +62,7 @@ func PlayMP3Loop(r io.ReadCloser) (*oto.Player, func() error, error) { time.Sleep(10 * time.Millisecond) } if playing.Load() { + // 重置解码器位置 _, _ = dec.Seek(0, io.SeekStart) } } diff --git a/pkg/audio/play.go b/pkg/audio/play.go index 3182ccb..b640518 100644 --- a/pkg/audio/play.go +++ b/pkg/audio/play.go @@ -32,29 +32,43 @@ func PlayWav(ctx context.Context, r io.ReadCloser) error { // 获取音频格式信息 format, err := dec.Format() - if err == nil { - duration, _ := dec.Duration() - zap.S().Debugf("WAV 格式: %d ch, %d Hz, %d bits, 时长: %v", - format.NumChannels, format.SampleRate, format.BitsPerSample, duration) + if err != nil { + return fmt.Errorf("获取 WAV 格式失败: %w", err) } - player := otoCtx.NewPlayer(dec) + duration, _ := dec.Duration() + sourceRate := int(format.SampleRate) + targetRate := UniversalSampleRate + channels := int(format.NumChannels) + + zap.S().Infof("WAV 音频: %d ch, %d Hz → %d Hz, 时长: %v", + channels, sourceRate, targetRate, duration) + + // 需要重采样 + var reader io.Reader = dec + if needsResample(sourceRate, targetRate) { + zap.S().Infof("重采样: %d Hz → %d Hz", sourceRate, targetRate) + resampleReader, err := newResamplingReader(dec, sourceRate, targetRate, channels) + if err != nil { + return fmt.Errorf("创建重采样器失败: %w", err) + } + reader = resampleReader + } + + player := otoCtx.NewPlayer(reader) defer player.Close() player.Play() - // 等待播放完成 - 确保 Play() 调用后数据都被播放 + // 等待播放完成 done := make(chan struct{}) go func() { - // 先确保播放器开始播放 for !player.IsPlaying() { time.Sleep(10 * time.Millisecond) } - // 等待播放结束(播放完所有数据) for player.IsPlaying() { time.Sleep(10 * time.Millisecond) } - // 额外等待 200ms 确保缓冲区数据完全播放 time.Sleep(200 * time.Millisecond) close(done) }() @@ -82,26 +96,39 @@ func PlayMP3(ctx context.Context, r io.ReadCloser) error { defer r.Close() // MP3 解码器信息 - zap.S().Debugf("MP3 采样率: %d Hz, 时长: %d samples", - dec.SampleRate(), dec.Length()) + sampleRate := int(dec.SampleRate()) + sampleCount := dec.Length() + targetRate := UniversalSampleRate + channels := 2 // MP3 通常是立体声 + duration := time.Duration(float64(sampleCount)/float64(sampleRate)*1000) * time.Millisecond - player := otoCtx.NewPlayer(dec) + zap.S().Infof("MP3 音频: %d Hz → %d Hz, 时长约: %v", sampleRate, targetRate, duration) + + // 需要重采样 + var reader io.Reader = dec + if needsResample(sampleRate, targetRate) { + zap.S().Infof("重采样: %d Hz → %d Hz", sampleRate, targetRate) + resampleReader, err := newResamplingReader(dec, sampleRate, targetRate, channels) + if err != nil { + return fmt.Errorf("创建重采样器失败: %w", err) + } + reader = resampleReader + } + + player := otoCtx.NewPlayer(reader) defer player.Close() player.Play() - // 等待播放完成 - 确保 Play() 调用后数据都被播放 + // 等待播放完成 done := make(chan struct{}) go func() { - // 先确保播放器开始播放 for !player.IsPlaying() { time.Sleep(10 * time.Millisecond) } - // 等待播放结束(播放完所有数据) for player.IsPlaying() { time.Sleep(10 * time.Millisecond) } - // 额外等待 200ms 确保缓冲区数据完全播放 time.Sleep(200 * time.Millisecond) close(done) }() diff --git a/pkg/audio/resampler.go b/pkg/audio/resampler.go new file mode 100644 index 0000000..006387f --- /dev/null +++ b/pkg/audio/resampler.go @@ -0,0 +1,108 @@ +package audio + +import ( + "io" + + "github.com/zeozeozeo/gomplerate" +) + +// resamplingReader 包装 io.Reader 并提供音频重采样 +type resamplingReader struct { + source io.Reader + resampler *gomplerate.Resampler + buffer []byte // 原始数据缓冲区 + eof bool +} + +// newResamplingReader 创建重采样 reader +// sourceRate: 源采样率(如 16000) +// targetRate: 目标采样率(如 44100) +// channels: 声道数(1=单声道, 2=立体声) +func newResamplingReader(src io.Reader, sourceRate, targetRate, channels int) (io.Reader, error) { + resampler, err := gomplerate.NewResampler(channels, sourceRate, targetRate) + if err != nil { + return nil, err + } + + return &resamplingReader{ + source: src, + resampler: resampler, + buffer: make([]byte, 0, 8192), + }, nil +} + +func (r *resamplingReader) Read(p []byte) (n int, err error) { + const chunkSize = 4096 + + // 读取原始数据 + if !r.eof && len(r.buffer) < chunkSize { + buf := make([]byte, chunkSize) + rn, readErr := r.source.Read(buf) + if readErr != nil { + if readErr == io.EOF { + r.eof = true + } else { + return 0, readErr + } + } + if rn > 0 { + r.buffer = append(r.buffer, buf[:rn]...) + } + } + + // 没有数据了 + if len(r.buffer) == 0 { + return 0, io.EOF + } + + // 将字节转换为 int16 + int16Data := bytesToInt16(r.buffer) + + // 重采样 + resampled := r.resampler.ResampleInt16(int16Data) + + // 转回字节 + output := int16ToBytes(resampled) + + // 如果输出太小,继续读取 + if len(output) < len(p) && !r.eof { + return r.Read(p) + } + + // 复制到输出 + n = copy(p, output) + + // 更新缓冲区 + remainingSamples := (len(r.buffer) / 2) - len(int16Data) + if remainingSamples > 0 { + r.buffer = r.buffer[len(int16Data)*2:] + } else { + r.buffer = r.buffer[:0] + } + + return n, nil +} + +// bytesToInt16 将字节切片转换为 int16 切片 +func bytesToInt16(b []byte) []int16 { + result := make([]int16, len(b)/2) + for i := 0; i < len(result); i++ { + result[i] = int16(b[i*2]) | int16(b[i*2+1])<<8 + } + return result +} + +// int16ToBytes 将 int16 切片转换为字节切片 +func int16ToBytes(i []int16) []byte { + result := make([]byte, len(i)*2) + for n, v := range i { + result[n*2] = byte(v) + result[n*2+1] = byte(v >> 8) + } + return result +} + +// needsResample 检查是否需要重采样 +func needsResample(sourceRate, targetRate int) bool { + return sourceRate != targetRate +}