feat(audio): 添加单声道转立体声转换功能
All checks were successful
ci/woodpecker/tag/woodpecker Pipeline was successful

- 新增 monoToStereoReader 将单声道 WAV 实时转换为立体声
- PlayWav 自动检测单声道并应用转换管线
- 添加完整的单元测试覆盖转换逻辑
- 整理 import 顺序(goimports)

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
2026-04-09 10:34:45 +08:00
parent 9825a85359
commit 6ac23c28f1
3 changed files with 160 additions and 8 deletions

View File

@@ -31,5 +31,4 @@
// 资源管理:
// - 一次性播放: 函数内部自动管理所有资源
// - 循环播放: 调用者必须调用 defer cleanup() 清理资源
//
package audio

View File

@@ -7,11 +7,51 @@ import (
"io"
"time"
"github.com/youpy/go-wav"
"github.com/hajimehoshi/go-mp3"
"github.com/youpy/go-wav"
"go.uber.org/zap"
)
// monoToStereoReader 将单声道音频转换为立体声
type monoToStereoReader struct {
src io.Reader
buf []byte
}
func (m *monoToStereoReader) Read(p []byte) (int, error) {
maxSamples := len(p) / 4
if maxSamples == 0 {
return 0, nil
}
// 按需分配缓冲区
if cap(m.buf) < maxSamples*2 {
m.buf = make([]byte, maxSamples*2)
}
// 读取单声道数据
n, err := m.src.Read(m.buf[:maxSamples*2])
if n == 0 {
return 0, err
}
// 单声道→立体声:复制每个样本到左右声道
samples := n / 2
for i := range samples {
base := i * 4
mono := i * 2
p[base] = m.buf[mono] // 左声道低字节
p[base+1] = m.buf[mono+1] // 左声道高字节
p[base+2] = m.buf[mono] // 右声道低字节
p[base+3] = m.buf[mono+1] // 右声道高字节
}
if err == io.EOF {
return samples * 4, io.EOF
}
return samples * 4, nil
}
// PlayWav 播放 WAV 文件(阻塞),直到完成或 context 取消
func PlayWav(ctx context.Context, r io.ReadCloser) error {
// Read the entire file into memory since wav.NewReader needs ReadAt
@@ -33,15 +73,21 @@ func PlayWav(ctx context.Context, r io.ReadCloser) error {
duration, _ := dec.Duration()
sourceRate := int(format.SampleRate)
channels := int(format.NumChannels)
zap.S().Infof("WAV 音频: %d ch, %d Hz, 时长: %v",
format.NumChannels, sourceRate, duration)
zap.S().Infof("WAV 音频: %d ch, %d Hz, 时长: %v", channels, sourceRate, duration)
// 构建处理管线:单声道转换 → 重采样
reader := io.Reader(dec)
if channels == 1 {
zap.S().Infof("单声道转立体声: 1 ch → 2 ch")
reader = &monoToStereoReader{src: dec}
channels = DefaultChannelCount
}
// 需要重采样(使用 Sinc 高质量重采样)
var reader io.Reader = dec
if needsResampling(sourceRate) {
zap.S().Infof("Sinc 重采样: %d Hz → %d Hz", sourceRate, UniversalSampleRate)
reader = newSincResampler(dec, sourceRate, UniversalSampleRate, int(format.NumChannels))
zap.S().Infof("Sinc 重采样: %d Hz → %d Hz, %d ch", sourceRate, UniversalSampleRate, channels)
reader = newSincResampler(reader, sourceRate, UniversalSampleRate, channels)
}
otoCtx, err := initContext()

View File

@@ -1,7 +1,9 @@
package audio
import (
"bytes"
"context"
"io"
"os"
"testing"
"time"
@@ -75,3 +77,108 @@ func TestPlayContextCancellation(t *testing.T) {
t.Errorf("期望 context.Canceled 错误,得到: %v", err)
}
}
// TestMonoToStereoReader 测试单声道转立体声
func TestMonoToStereoReader(t *testing.T) {
// 创建测试数据4个单声道样本8字节
monoData := []byte{
0x00, 0x10, // 样本1: 0x1000 = 4096
0x00, 0x20, // 样本2: 0x2000 = 8192
0x00, 0x30, // 样本3: 0x3000 = 12288
0x00, 0x40, // 样本4: 0x4000 = 16384
}
reader := &monoToStereoReader{src: bytes.NewReader(monoData)}
output := make([]byte, 16) // 应该产生8个样本16字节
n, err := reader.Read(output)
if err != nil {
t.Fatalf("读取失败: %v", err)
}
if n != 16 {
t.Fatalf("期望读取16字节实际读取%d字节", n)
}
// 验证立体声输出(每个单声道样本被复制到左右声道)
expected := []byte{
0x00, 0x10, 0x00, 0x10, // 样本1: 左=0x1000, 右=0x1000
0x00, 0x20, 0x00, 0x20, // 样本2: 左=0x2000, 右=0x2000
0x00, 0x30, 0x00, 0x30, // 样本3: 左=0x3000, 右=0x3000
0x00, 0x40, 0x00, 0x40, // 样本4: 左=0x4000, 右=0x4000
}
if !bytes.Equal(output, expected) {
t.Errorf("立体声转换不正确\n期望: %x\n实际: %x", expected, output)
}
}
// TestMonoToStereoReaderStreaming 测试流式读取
func TestMonoToStereoReaderStreaming(t *testing.T) {
// 创建较大的测试数据
monoData := make([]byte, 1000)
for i := range monoData {
monoData[i] = byte(i % 256)
}
reader := &monoToStereoReader{src: bytes.NewReader(monoData)}
totalRead := 0
buf := make([]byte, 32) // 小缓冲区
for {
n, err := reader.Read(buf)
totalRead += n
if err == io.EOF {
break
}
if err != nil {
t.Fatalf("流式读取失败: %v", err)
}
if n == 0 {
t.Fatal("读取返回0字节但未EOF")
}
}
// 1000字节单声道应该转换为2000字节立体声
expectedTotal := 2000
if totalRead != expectedTotal {
t.Fatalf("期望总共读取%d字节实际读取%d字节", expectedTotal, totalRead)
}
}
// TestMonoToStereoReaderPartialRead 测试部分读取
func TestMonoToStereoReaderPartialRead(t *testing.T) {
monoData := []byte{0x00, 0x10, 0x00, 0x20, 0x00, 0x30} // 3个单声道样本
reader := &monoToStereoReader{src: bytes.NewReader(monoData)}
// 第一次读取请求6字节输出只能读取1个单声道样本=4字节输出
buf1 := make([]byte, 6)
n1, err := reader.Read(buf1)
if err != nil {
t.Fatalf("第一次读取失败: %v", err)
}
if n1 != 4 {
t.Fatalf("第一次读取期望4字节实际%d字节", n1)
}
// 第二次读取请求10字节输出读取剩余2个单声道样本=8字节输出
buf2 := make([]byte, 10)
n2, err := reader.Read(buf2)
if err != nil {
t.Fatalf("第二次读取失败: %v", err)
}
// 剩余2个单声道样本转换为8字节立体声
if n2 != 8 {
t.Fatalf("第二次读取期望8字节实际%d字节", n2)
}
// 第三次读取应该返回EOF
buf3 := make([]byte, 10)
n3, err := reader.Read(buf3)
if err != io.EOF {
t.Fatalf("第三次读取期望EOF实际: %v", err)
}
if n3 != 0 {
t.Fatalf("第三次读取EOF时期望0字节实际%d字节", n3)
}
}