feat(audio): 添加音频重采样支持,修复播放速度问题
问题: - TTS 返回 16000 Hz 音频,但 Context 使用 44100 Hz - 播放速度快 2.75 倍(44100/16000) - 不同采样率的音频播放速度不正确 解决方案: - 集成 gomplerate 库(纯 Go,零依赖) - 自动检测音频采样率并重采样到 44100 Hz - 支持任意采样率的音频文件正常播放 技术实现: - resampler.go: 封装 gomplerate,实现流式重采样 - play.go: WAV/MP3 播放自动重采样 - loop.go: BGM 循环播放支持重采样 测试: - 所有单元测试通过(6/6) - 支持采样率自动转换(如 16000 Hz → 44100 Hz) 依赖: - github.com/zeozeozeo/gomplerate v0.0.0
This commit is contained in:
1
go.mod
1
go.mod
@@ -53,6 +53,7 @@ require (
|
|||||||
github.com/ysmood/gson v0.7.3 // indirect
|
github.com/ysmood/gson v0.7.3 // indirect
|
||||||
github.com/ysmood/leakless v0.9.0 // indirect
|
github.com/ysmood/leakless v0.9.0 // indirect
|
||||||
github.com/zaf/g711 v1.4.0 // indirect
|
github.com/zaf/g711 v1.4.0 // indirect
|
||||||
|
github.com/zeozeozeo/gomplerate v0.0.0-20250404113140-0fbb236df825 // indirect
|
||||||
go.uber.org/atomic v1.11.0 // indirect
|
go.uber.org/atomic v1.11.0 // indirect
|
||||||
go.uber.org/multierr v1.11.0 // indirect
|
go.uber.org/multierr v1.11.0 // indirect
|
||||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||||
|
|||||||
2
go.sum
2
go.sum
@@ -157,6 +157,8 @@ github.com/ysmood/leakless v0.9.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY
|
|||||||
github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8=
|
github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8=
|
||||||
github.com/zaf/g711 v1.4.0 h1:XZYkjjiAg9QTBnHqEg37m2I9q3IIDv5JRYXs2N8ma7c=
|
github.com/zaf/g711 v1.4.0 h1:XZYkjjiAg9QTBnHqEg37m2I9q3IIDv5JRYXs2N8ma7c=
|
||||||
github.com/zaf/g711 v1.4.0/go.mod h1:eCDXt3dSp/kYYAoooba7ukD/Q75jvAaS4WOMr0l1Roo=
|
github.com/zaf/g711 v1.4.0/go.mod h1:eCDXt3dSp/kYYAoooba7ukD/Q75jvAaS4WOMr0l1Roo=
|
||||||
|
github.com/zeozeozeo/gomplerate v0.0.0-20250404113140-0fbb236df825 h1:rViu1xhQRtdJogc39jF46PS01xHVD736JowXl2qOcPM=
|
||||||
|
github.com/zeozeozeo/gomplerate v0.0.0-20250404113140-0fbb236df825/go.mod h1:ASuMFHITnaVdPvMkoDGI4tTwYG9fW7Mxv2j5AuvTo8Q=
|
||||||
go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
|
go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
|
||||||
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
|
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
|
||||||
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
|
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
DefaultSampleRate = 44100 // 采样率
|
UniversalSampleRate = 44100 // 通用采样率(高质量音频)
|
||||||
DefaultChannelCount = 2 // 声道数(立体声)
|
DefaultChannelCount = 2 // 声道数(立体声)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -21,7 +21,7 @@ func initContext() (*oto.Context, error) {
|
|||||||
var initErr error
|
var initErr error
|
||||||
otoOnce.Do(func() {
|
otoOnce.Do(func() {
|
||||||
op := &oto.NewContextOptions{}
|
op := &oto.NewContextOptions{}
|
||||||
op.SampleRate = DefaultSampleRate
|
op.SampleRate = UniversalSampleRate
|
||||||
op.ChannelCount = DefaultChannelCount
|
op.ChannelCount = DefaultChannelCount
|
||||||
op.Format = oto.FormatSignedInt16LE
|
op.Format = oto.FormatSignedInt16LE
|
||||||
|
|
||||||
@@ -31,7 +31,7 @@ func initContext() (*oto.Context, error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
<-ready
|
<-ready
|
||||||
zap.S().Infoln("oto/v3 音频系统就绪")
|
zap.S().Infof("oto/v3 音频系统就绪 (%d Hz)", UniversalSampleRate)
|
||||||
})
|
})
|
||||||
return otoCtx, initErr
|
return otoCtx, initErr
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,7 +33,21 @@ func PlayMP3Loop(r io.ReadCloser) (*oto.Player, func() error, error) {
|
|||||||
return nil, func() error { return nil }, err
|
return nil, func() error { return nil }, err
|
||||||
}
|
}
|
||||||
|
|
||||||
player := otoCtx.NewPlayer(dec)
|
// 获取采样率信息
|
||||||
|
sampleRate := int(dec.SampleRate())
|
||||||
|
targetRate := UniversalSampleRate
|
||||||
|
|
||||||
|
// 需要重采样
|
||||||
|
var reader io.Reader = dec
|
||||||
|
if needsResample(sampleRate, targetRate) {
|
||||||
|
resampleReader, err := newResamplingReader(dec, sampleRate, targetRate, 2)
|
||||||
|
if err != nil {
|
||||||
|
return nil, func() error { return nil }, err
|
||||||
|
}
|
||||||
|
reader = resampleReader
|
||||||
|
}
|
||||||
|
|
||||||
|
player := otoCtx.NewPlayer(reader)
|
||||||
|
|
||||||
playing := atomic.Bool{}
|
playing := atomic.Bool{}
|
||||||
playing.Store(true)
|
playing.Store(true)
|
||||||
@@ -48,6 +62,7 @@ func PlayMP3Loop(r io.ReadCloser) (*oto.Player, func() error, error) {
|
|||||||
time.Sleep(10 * time.Millisecond)
|
time.Sleep(10 * time.Millisecond)
|
||||||
}
|
}
|
||||||
if playing.Load() {
|
if playing.Load() {
|
||||||
|
// 重置解码器位置
|
||||||
_, _ = dec.Seek(0, io.SeekStart)
|
_, _ = dec.Seek(0, io.SeekStart)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,29 +32,43 @@ func PlayWav(ctx context.Context, r io.ReadCloser) error {
|
|||||||
|
|
||||||
// 获取音频格式信息
|
// 获取音频格式信息
|
||||||
format, err := dec.Format()
|
format, err := dec.Format()
|
||||||
if err == nil {
|
if err != nil {
|
||||||
duration, _ := dec.Duration()
|
return fmt.Errorf("获取 WAV 格式失败: %w", err)
|
||||||
zap.S().Debugf("WAV 格式: %d ch, %d Hz, %d bits, 时长: %v",
|
|
||||||
format.NumChannels, format.SampleRate, format.BitsPerSample, duration)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
player := otoCtx.NewPlayer(dec)
|
duration, _ := dec.Duration()
|
||||||
|
sourceRate := int(format.SampleRate)
|
||||||
|
targetRate := UniversalSampleRate
|
||||||
|
channels := int(format.NumChannels)
|
||||||
|
|
||||||
|
zap.S().Infof("WAV 音频: %d ch, %d Hz → %d Hz, 时长: %v",
|
||||||
|
channels, sourceRate, targetRate, duration)
|
||||||
|
|
||||||
|
// 需要重采样
|
||||||
|
var reader io.Reader = dec
|
||||||
|
if needsResample(sourceRate, targetRate) {
|
||||||
|
zap.S().Infof("重采样: %d Hz → %d Hz", sourceRate, targetRate)
|
||||||
|
resampleReader, err := newResamplingReader(dec, sourceRate, targetRate, channels)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("创建重采样器失败: %w", err)
|
||||||
|
}
|
||||||
|
reader = resampleReader
|
||||||
|
}
|
||||||
|
|
||||||
|
player := otoCtx.NewPlayer(reader)
|
||||||
defer player.Close()
|
defer player.Close()
|
||||||
|
|
||||||
player.Play()
|
player.Play()
|
||||||
|
|
||||||
// 等待播放完成 - 确保 Play() 调用后数据都被播放
|
// 等待播放完成
|
||||||
done := make(chan struct{})
|
done := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
// 先确保播放器开始播放
|
|
||||||
for !player.IsPlaying() {
|
for !player.IsPlaying() {
|
||||||
time.Sleep(10 * time.Millisecond)
|
time.Sleep(10 * time.Millisecond)
|
||||||
}
|
}
|
||||||
// 等待播放结束(播放完所有数据)
|
|
||||||
for player.IsPlaying() {
|
for player.IsPlaying() {
|
||||||
time.Sleep(10 * time.Millisecond)
|
time.Sleep(10 * time.Millisecond)
|
||||||
}
|
}
|
||||||
// 额外等待 200ms 确保缓冲区数据完全播放
|
|
||||||
time.Sleep(200 * time.Millisecond)
|
time.Sleep(200 * time.Millisecond)
|
||||||
close(done)
|
close(done)
|
||||||
}()
|
}()
|
||||||
@@ -82,26 +96,39 @@ func PlayMP3(ctx context.Context, r io.ReadCloser) error {
|
|||||||
defer r.Close()
|
defer r.Close()
|
||||||
|
|
||||||
// MP3 解码器信息
|
// MP3 解码器信息
|
||||||
zap.S().Debugf("MP3 采样率: %d Hz, 时长: %d samples",
|
sampleRate := int(dec.SampleRate())
|
||||||
dec.SampleRate(), dec.Length())
|
sampleCount := dec.Length()
|
||||||
|
targetRate := UniversalSampleRate
|
||||||
|
channels := 2 // MP3 通常是立体声
|
||||||
|
duration := time.Duration(float64(sampleCount)/float64(sampleRate)*1000) * time.Millisecond
|
||||||
|
|
||||||
player := otoCtx.NewPlayer(dec)
|
zap.S().Infof("MP3 音频: %d Hz → %d Hz, 时长约: %v", sampleRate, targetRate, duration)
|
||||||
|
|
||||||
|
// 需要重采样
|
||||||
|
var reader io.Reader = dec
|
||||||
|
if needsResample(sampleRate, targetRate) {
|
||||||
|
zap.S().Infof("重采样: %d Hz → %d Hz", sampleRate, targetRate)
|
||||||
|
resampleReader, err := newResamplingReader(dec, sampleRate, targetRate, channels)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("创建重采样器失败: %w", err)
|
||||||
|
}
|
||||||
|
reader = resampleReader
|
||||||
|
}
|
||||||
|
|
||||||
|
player := otoCtx.NewPlayer(reader)
|
||||||
defer player.Close()
|
defer player.Close()
|
||||||
|
|
||||||
player.Play()
|
player.Play()
|
||||||
|
|
||||||
// 等待播放完成 - 确保 Play() 调用后数据都被播放
|
// 等待播放完成
|
||||||
done := make(chan struct{})
|
done := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
// 先确保播放器开始播放
|
|
||||||
for !player.IsPlaying() {
|
for !player.IsPlaying() {
|
||||||
time.Sleep(10 * time.Millisecond)
|
time.Sleep(10 * time.Millisecond)
|
||||||
}
|
}
|
||||||
// 等待播放结束(播放完所有数据)
|
|
||||||
for player.IsPlaying() {
|
for player.IsPlaying() {
|
||||||
time.Sleep(10 * time.Millisecond)
|
time.Sleep(10 * time.Millisecond)
|
||||||
}
|
}
|
||||||
// 额外等待 200ms 确保缓冲区数据完全播放
|
|
||||||
time.Sleep(200 * time.Millisecond)
|
time.Sleep(200 * time.Millisecond)
|
||||||
close(done)
|
close(done)
|
||||||
}()
|
}()
|
||||||
|
|||||||
108
pkg/audio/resampler.go
Normal file
108
pkg/audio/resampler.go
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
package audio
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/zeozeozeo/gomplerate"
|
||||||
|
)
|
||||||
|
|
||||||
|
// resamplingReader 包装 io.Reader 并提供音频重采样
|
||||||
|
type resamplingReader struct {
|
||||||
|
source io.Reader
|
||||||
|
resampler *gomplerate.Resampler
|
||||||
|
buffer []byte // 原始数据缓冲区
|
||||||
|
eof bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// newResamplingReader 创建重采样 reader
|
||||||
|
// sourceRate: 源采样率(如 16000)
|
||||||
|
// targetRate: 目标采样率(如 44100)
|
||||||
|
// channels: 声道数(1=单声道, 2=立体声)
|
||||||
|
func newResamplingReader(src io.Reader, sourceRate, targetRate, channels int) (io.Reader, error) {
|
||||||
|
resampler, err := gomplerate.NewResampler(channels, sourceRate, targetRate)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &resamplingReader{
|
||||||
|
source: src,
|
||||||
|
resampler: resampler,
|
||||||
|
buffer: make([]byte, 0, 8192),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *resamplingReader) Read(p []byte) (n int, err error) {
|
||||||
|
const chunkSize = 4096
|
||||||
|
|
||||||
|
// 读取原始数据
|
||||||
|
if !r.eof && len(r.buffer) < chunkSize {
|
||||||
|
buf := make([]byte, chunkSize)
|
||||||
|
rn, readErr := r.source.Read(buf)
|
||||||
|
if readErr != nil {
|
||||||
|
if readErr == io.EOF {
|
||||||
|
r.eof = true
|
||||||
|
} else {
|
||||||
|
return 0, readErr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if rn > 0 {
|
||||||
|
r.buffer = append(r.buffer, buf[:rn]...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 没有数据了
|
||||||
|
if len(r.buffer) == 0 {
|
||||||
|
return 0, io.EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将字节转换为 int16
|
||||||
|
int16Data := bytesToInt16(r.buffer)
|
||||||
|
|
||||||
|
// 重采样
|
||||||
|
resampled := r.resampler.ResampleInt16(int16Data)
|
||||||
|
|
||||||
|
// 转回字节
|
||||||
|
output := int16ToBytes(resampled)
|
||||||
|
|
||||||
|
// 如果输出太小,继续读取
|
||||||
|
if len(output) < len(p) && !r.eof {
|
||||||
|
return r.Read(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 复制到输出
|
||||||
|
n = copy(p, output)
|
||||||
|
|
||||||
|
// 更新缓冲区
|
||||||
|
remainingSamples := (len(r.buffer) / 2) - len(int16Data)
|
||||||
|
if remainingSamples > 0 {
|
||||||
|
r.buffer = r.buffer[len(int16Data)*2:]
|
||||||
|
} else {
|
||||||
|
r.buffer = r.buffer[:0]
|
||||||
|
}
|
||||||
|
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// bytesToInt16 将字节切片转换为 int16 切片
|
||||||
|
func bytesToInt16(b []byte) []int16 {
|
||||||
|
result := make([]int16, len(b)/2)
|
||||||
|
for i := 0; i < len(result); i++ {
|
||||||
|
result[i] = int16(b[i*2]) | int16(b[i*2+1])<<8
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// int16ToBytes 将 int16 切片转换为字节切片
|
||||||
|
func int16ToBytes(i []int16) []byte {
|
||||||
|
result := make([]byte, len(i)*2)
|
||||||
|
for n, v := range i {
|
||||||
|
result[n*2] = byte(v)
|
||||||
|
result[n*2+1] = byte(v >> 8)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// needsResample 检查是否需要重采样
|
||||||
|
func needsResample(sourceRate, targetRate int) bool {
|
||||||
|
return sourceRate != targetRate
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user