feat(audio): 添加音频重采样支持,修复播放速度问题

问题:
- TTS 返回 16000 Hz 音频,但 Context 使用 44100 Hz
- 播放速度快 2.75 倍(44100/16000)
- 不同采样率的音频播放速度不正确

解决方案:
- 集成 gomplerate 库(纯 Go,零依赖)
- 自动检测音频采样率并重采样到 44100 Hz
- 支持任意采样率的音频文件正常播放

技术实现:
- resampler.go: 封装 gomplerate,实现流式重采样
- play.go: WAV/MP3 播放自动重采样
- loop.go: BGM 循环播放支持重采样

测试:
- 所有单元测试通过(6/6)
- 支持采样率自动转换(如 16000 Hz → 44100 Hz)

依赖:
- github.com/zeozeozeo/gomplerate v0.0.0
This commit is contained in:
2026-04-08 19:39:58 +08:00
parent baa32fedc3
commit 4ddecb7c30
6 changed files with 174 additions and 21 deletions

1
go.mod
View File

@@ -53,6 +53,7 @@ require (
github.com/ysmood/gson v0.7.3 // indirect github.com/ysmood/gson v0.7.3 // indirect
github.com/ysmood/leakless v0.9.0 // indirect github.com/ysmood/leakless v0.9.0 // indirect
github.com/zaf/g711 v1.4.0 // indirect github.com/zaf/g711 v1.4.0 // indirect
github.com/zeozeozeo/gomplerate v0.0.0-20250404113140-0fbb236df825 // indirect
go.uber.org/atomic v1.11.0 // indirect go.uber.org/atomic v1.11.0 // indirect
go.uber.org/multierr v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect

2
go.sum
View File

@@ -157,6 +157,8 @@ github.com/ysmood/leakless v0.9.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY
github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8= github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8=
github.com/zaf/g711 v1.4.0 h1:XZYkjjiAg9QTBnHqEg37m2I9q3IIDv5JRYXs2N8ma7c= github.com/zaf/g711 v1.4.0 h1:XZYkjjiAg9QTBnHqEg37m2I9q3IIDv5JRYXs2N8ma7c=
github.com/zaf/g711 v1.4.0/go.mod h1:eCDXt3dSp/kYYAoooba7ukD/Q75jvAaS4WOMr0l1Roo= github.com/zaf/g711 v1.4.0/go.mod h1:eCDXt3dSp/kYYAoooba7ukD/Q75jvAaS4WOMr0l1Roo=
github.com/zeozeozeo/gomplerate v0.0.0-20250404113140-0fbb236df825 h1:rViu1xhQRtdJogc39jF46PS01xHVD736JowXl2qOcPM=
github.com/zeozeozeo/gomplerate v0.0.0-20250404113140-0fbb236df825/go.mod h1:ASuMFHITnaVdPvMkoDGI4tTwYG9fW7Mxv2j5AuvTo8Q=
go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=

View File

@@ -13,7 +13,7 @@ var (
) )
const ( const (
DefaultSampleRate = 44100 // 采样率 UniversalSampleRate = 44100 // 通用采样率(高质量音频)
DefaultChannelCount = 2 // 声道数(立体声) DefaultChannelCount = 2 // 声道数(立体声)
) )
@@ -21,7 +21,7 @@ func initContext() (*oto.Context, error) {
var initErr error var initErr error
otoOnce.Do(func() { otoOnce.Do(func() {
op := &oto.NewContextOptions{} op := &oto.NewContextOptions{}
op.SampleRate = DefaultSampleRate op.SampleRate = UniversalSampleRate
op.ChannelCount = DefaultChannelCount op.ChannelCount = DefaultChannelCount
op.Format = oto.FormatSignedInt16LE op.Format = oto.FormatSignedInt16LE
@@ -31,7 +31,7 @@ func initContext() (*oto.Context, error) {
return return
} }
<-ready <-ready
zap.S().Infoln("oto/v3 音频系统就绪") zap.S().Infof("oto/v3 音频系统就绪 (%d Hz)", UniversalSampleRate)
}) })
return otoCtx, initErr return otoCtx, initErr
} }

View File

@@ -33,7 +33,21 @@ func PlayMP3Loop(r io.ReadCloser) (*oto.Player, func() error, error) {
return nil, func() error { return nil }, err return nil, func() error { return nil }, err
} }
player := otoCtx.NewPlayer(dec) // 获取采样率信息
sampleRate := int(dec.SampleRate())
targetRate := UniversalSampleRate
// 需要重采样
var reader io.Reader = dec
if needsResample(sampleRate, targetRate) {
resampleReader, err := newResamplingReader(dec, sampleRate, targetRate, 2)
if err != nil {
return nil, func() error { return nil }, err
}
reader = resampleReader
}
player := otoCtx.NewPlayer(reader)
playing := atomic.Bool{} playing := atomic.Bool{}
playing.Store(true) playing.Store(true)
@@ -48,6 +62,7 @@ func PlayMP3Loop(r io.ReadCloser) (*oto.Player, func() error, error) {
time.Sleep(10 * time.Millisecond) time.Sleep(10 * time.Millisecond)
} }
if playing.Load() { if playing.Load() {
// 重置解码器位置
_, _ = dec.Seek(0, io.SeekStart) _, _ = dec.Seek(0, io.SeekStart)
} }
} }

View File

@@ -32,29 +32,43 @@ func PlayWav(ctx context.Context, r io.ReadCloser) error {
// 获取音频格式信息 // 获取音频格式信息
format, err := dec.Format() format, err := dec.Format()
if err == nil { if err != nil {
duration, _ := dec.Duration() return fmt.Errorf("获取 WAV 格式失败: %w", err)
zap.S().Debugf("WAV 格式: %d ch, %d Hz, %d bits, 时长: %v",
format.NumChannels, format.SampleRate, format.BitsPerSample, duration)
} }
player := otoCtx.NewPlayer(dec) duration, _ := dec.Duration()
sourceRate := int(format.SampleRate)
targetRate := UniversalSampleRate
channels := int(format.NumChannels)
zap.S().Infof("WAV 音频: %d ch, %d Hz → %d Hz, 时长: %v",
channels, sourceRate, targetRate, duration)
// 需要重采样
var reader io.Reader = dec
if needsResample(sourceRate, targetRate) {
zap.S().Infof("重采样: %d Hz → %d Hz", sourceRate, targetRate)
resampleReader, err := newResamplingReader(dec, sourceRate, targetRate, channels)
if err != nil {
return fmt.Errorf("创建重采样器失败: %w", err)
}
reader = resampleReader
}
player := otoCtx.NewPlayer(reader)
defer player.Close() defer player.Close()
player.Play() player.Play()
// 等待播放完成 - 确保 Play() 调用后数据都被播放 // 等待播放完成
done := make(chan struct{}) done := make(chan struct{})
go func() { go func() {
// 先确保播放器开始播放
for !player.IsPlaying() { for !player.IsPlaying() {
time.Sleep(10 * time.Millisecond) time.Sleep(10 * time.Millisecond)
} }
// 等待播放结束(播放完所有数据)
for player.IsPlaying() { for player.IsPlaying() {
time.Sleep(10 * time.Millisecond) time.Sleep(10 * time.Millisecond)
} }
// 额外等待 200ms 确保缓冲区数据完全播放
time.Sleep(200 * time.Millisecond) time.Sleep(200 * time.Millisecond)
close(done) close(done)
}() }()
@@ -82,26 +96,39 @@ func PlayMP3(ctx context.Context, r io.ReadCloser) error {
defer r.Close() defer r.Close()
// MP3 解码器信息 // MP3 解码器信息
zap.S().Debugf("MP3 采样率: %d Hz, 时长: %d samples", sampleRate := int(dec.SampleRate())
dec.SampleRate(), dec.Length()) sampleCount := dec.Length()
targetRate := UniversalSampleRate
channels := 2 // MP3 通常是立体声
duration := time.Duration(float64(sampleCount)/float64(sampleRate)*1000) * time.Millisecond
player := otoCtx.NewPlayer(dec) zap.S().Infof("MP3 音频: %d Hz → %d Hz, 时长约: %v", sampleRate, targetRate, duration)
// 需要重采样
var reader io.Reader = dec
if needsResample(sampleRate, targetRate) {
zap.S().Infof("重采样: %d Hz → %d Hz", sampleRate, targetRate)
resampleReader, err := newResamplingReader(dec, sampleRate, targetRate, channels)
if err != nil {
return fmt.Errorf("创建重采样器失败: %w", err)
}
reader = resampleReader
}
player := otoCtx.NewPlayer(reader)
defer player.Close() defer player.Close()
player.Play() player.Play()
// 等待播放完成 - 确保 Play() 调用后数据都被播放 // 等待播放完成
done := make(chan struct{}) done := make(chan struct{})
go func() { go func() {
// 先确保播放器开始播放
for !player.IsPlaying() { for !player.IsPlaying() {
time.Sleep(10 * time.Millisecond) time.Sleep(10 * time.Millisecond)
} }
// 等待播放结束(播放完所有数据)
for player.IsPlaying() { for player.IsPlaying() {
time.Sleep(10 * time.Millisecond) time.Sleep(10 * time.Millisecond)
} }
// 额外等待 200ms 确保缓冲区数据完全播放
time.Sleep(200 * time.Millisecond) time.Sleep(200 * time.Millisecond)
close(done) close(done)
}() }()

108
pkg/audio/resampler.go Normal file
View File

@@ -0,0 +1,108 @@
package audio
import (
"io"
"github.com/zeozeozeo/gomplerate"
)
// resamplingReader 包装 io.Reader 并提供音频重采样
type resamplingReader struct {
source io.Reader
resampler *gomplerate.Resampler
buffer []byte // 原始数据缓冲区
eof bool
}
// newResamplingReader 创建重采样 reader
// sourceRate: 源采样率(如 16000
// targetRate: 目标采样率(如 44100
// channels: 声道数1=单声道, 2=立体声)
func newResamplingReader(src io.Reader, sourceRate, targetRate, channels int) (io.Reader, error) {
resampler, err := gomplerate.NewResampler(channels, sourceRate, targetRate)
if err != nil {
return nil, err
}
return &resamplingReader{
source: src,
resampler: resampler,
buffer: make([]byte, 0, 8192),
}, nil
}
func (r *resamplingReader) Read(p []byte) (n int, err error) {
const chunkSize = 4096
// 读取原始数据
if !r.eof && len(r.buffer) < chunkSize {
buf := make([]byte, chunkSize)
rn, readErr := r.source.Read(buf)
if readErr != nil {
if readErr == io.EOF {
r.eof = true
} else {
return 0, readErr
}
}
if rn > 0 {
r.buffer = append(r.buffer, buf[:rn]...)
}
}
// 没有数据了
if len(r.buffer) == 0 {
return 0, io.EOF
}
// 将字节转换为 int16
int16Data := bytesToInt16(r.buffer)
// 重采样
resampled := r.resampler.ResampleInt16(int16Data)
// 转回字节
output := int16ToBytes(resampled)
// 如果输出太小,继续读取
if len(output) < len(p) && !r.eof {
return r.Read(p)
}
// 复制到输出
n = copy(p, output)
// 更新缓冲区
remainingSamples := (len(r.buffer) / 2) - len(int16Data)
if remainingSamples > 0 {
r.buffer = r.buffer[len(int16Data)*2:]
} else {
r.buffer = r.buffer[:0]
}
return n, nil
}
// bytesToInt16 将字节切片转换为 int16 切片
func bytesToInt16(b []byte) []int16 {
result := make([]int16, len(b)/2)
for i := 0; i < len(result); i++ {
result[i] = int16(b[i*2]) | int16(b[i*2+1])<<8
}
return result
}
// int16ToBytes 将 int16 切片转换为字节切片
func int16ToBytes(i []int16) []byte {
result := make([]byte, len(i)*2)
for n, v := range i {
result[n*2] = byte(v)
result[n*2+1] = byte(v >> 8)
}
return result
}
// needsResample 检查是否需要重采样
func needsResample(sourceRate, targetRate int) bool {
return sourceRate != targetRate
}