diff --git a/pkg/audio/doc.go b/pkg/audio/doc.go index abd3b42..4503353 100644 --- a/pkg/audio/doc.go +++ b/pkg/audio/doc.go @@ -31,5 +31,4 @@ // 资源管理: // - 一次性播放: 函数内部自动管理所有资源 // - 循环播放: 调用者必须调用 defer cleanup() 清理资源 -// package audio diff --git a/pkg/audio/play.go b/pkg/audio/play.go index 5c7e9b7..af70996 100644 --- a/pkg/audio/play.go +++ b/pkg/audio/play.go @@ -7,11 +7,51 @@ import ( "io" "time" - "github.com/youpy/go-wav" "github.com/hajimehoshi/go-mp3" + "github.com/youpy/go-wav" "go.uber.org/zap" ) +// monoToStereoReader 将单声道音频转换为立体声 +type monoToStereoReader struct { + src io.Reader + buf []byte +} + +func (m *monoToStereoReader) Read(p []byte) (int, error) { + maxSamples := len(p) / 4 + if maxSamples == 0 { + return 0, nil + } + + // 按需分配缓冲区 + if cap(m.buf) < maxSamples*2 { + m.buf = make([]byte, maxSamples*2) + } + + // 读取单声道数据 + n, err := m.src.Read(m.buf[:maxSamples*2]) + if n == 0 { + return 0, err + } + + // 单声道→立体声:复制每个样本到左右声道 + samples := n / 2 + for i := range samples { + base := i * 4 + mono := i * 2 + p[base] = m.buf[mono] // 左声道低字节 + p[base+1] = m.buf[mono+1] // 左声道高字节 + p[base+2] = m.buf[mono] // 右声道低字节 + p[base+3] = m.buf[mono+1] // 右声道高字节 + } + + if err == io.EOF { + return samples * 4, io.EOF + } + return samples * 4, nil +} + // PlayWav 播放 WAV 文件(阻塞),直到完成或 context 取消 func PlayWav(ctx context.Context, r io.ReadCloser) error { // Read the entire file into memory since wav.NewReader needs ReadAt @@ -33,15 +73,21 @@ func PlayWav(ctx context.Context, r io.ReadCloser) error { duration, _ := dec.Duration() sourceRate := int(format.SampleRate) + channels := int(format.NumChannels) - zap.S().Infof("WAV 音频: %d ch, %d Hz, 时长: %v", - format.NumChannels, sourceRate, duration) + zap.S().Infof("WAV 音频: %d ch, %d Hz, 时长: %v", channels, sourceRate, duration) + + // 构建处理管线:单声道转换 → 重采样 + reader := io.Reader(dec) + if channels == 1 { + zap.S().Infof("单声道转立体声: 1 ch → 2 ch") + reader = &monoToStereoReader{src: dec} + channels = DefaultChannelCount + } - // 需要重采样(使用 Sinc 高质量重采样) - var reader io.Reader = dec if needsResampling(sourceRate) { - zap.S().Infof("Sinc 重采样: %d Hz → %d Hz", sourceRate, UniversalSampleRate) - reader = newSincResampler(dec, sourceRate, UniversalSampleRate, int(format.NumChannels)) + zap.S().Infof("Sinc 重采样: %d Hz → %d Hz, %d ch", sourceRate, UniversalSampleRate, channels) + reader = newSincResampler(reader, sourceRate, UniversalSampleRate, channels) } otoCtx, err := initContext() diff --git a/pkg/audio/play_test.go b/pkg/audio/play_test.go index e6eaf80..8986204 100644 --- a/pkg/audio/play_test.go +++ b/pkg/audio/play_test.go @@ -1,7 +1,9 @@ package audio import ( + "bytes" "context" + "io" "os" "testing" "time" @@ -75,3 +77,108 @@ func TestPlayContextCancellation(t *testing.T) { t.Errorf("期望 context.Canceled 错误,得到: %v", err) } } + +// TestMonoToStereoReader 测试单声道转立体声 +func TestMonoToStereoReader(t *testing.T) { + // 创建测试数据:4个单声道样本(8字节) + monoData := []byte{ + 0x00, 0x10, // 样本1: 0x1000 = 4096 + 0x00, 0x20, // 样本2: 0x2000 = 8192 + 0x00, 0x30, // 样本3: 0x3000 = 12288 + 0x00, 0x40, // 样本4: 0x4000 = 16384 + } + + reader := &monoToStereoReader{src: bytes.NewReader(monoData)} + output := make([]byte, 16) // 应该产生8个样本(16字节) + + n, err := reader.Read(output) + if err != nil { + t.Fatalf("读取失败: %v", err) + } + + if n != 16 { + t.Fatalf("期望读取16字节,实际读取%d字节", n) + } + + // 验证立体声输出(每个单声道样本被复制到左右声道) + expected := []byte{ + 0x00, 0x10, 0x00, 0x10, // 样本1: 左=0x1000, 右=0x1000 + 0x00, 0x20, 0x00, 0x20, // 样本2: 左=0x2000, 右=0x2000 + 0x00, 0x30, 0x00, 0x30, // 样本3: 左=0x3000, 右=0x3000 + 0x00, 0x40, 0x00, 0x40, // 样本4: 左=0x4000, 右=0x4000 + } + + if !bytes.Equal(output, expected) { + t.Errorf("立体声转换不正确\n期望: %x\n实际: %x", expected, output) + } +} + +// TestMonoToStereoReaderStreaming 测试流式读取 +func TestMonoToStereoReaderStreaming(t *testing.T) { + // 创建较大的测试数据 + monoData := make([]byte, 1000) + for i := range monoData { + monoData[i] = byte(i % 256) + } + + reader := &monoToStereoReader{src: bytes.NewReader(monoData)} + totalRead := 0 + buf := make([]byte, 32) // 小缓冲区 + + for { + n, err := reader.Read(buf) + totalRead += n + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("流式读取失败: %v", err) + } + if n == 0 { + t.Fatal("读取返回0字节但未EOF") + } + } + + // 1000字节单声道应该转换为2000字节立体声 + expectedTotal := 2000 + if totalRead != expectedTotal { + t.Fatalf("期望总共读取%d字节,实际读取%d字节", expectedTotal, totalRead) + } +} + +// TestMonoToStereoReaderPartialRead 测试部分读取 +func TestMonoToStereoReaderPartialRead(t *testing.T) { + monoData := []byte{0x00, 0x10, 0x00, 0x20, 0x00, 0x30} // 3个单声道样本 + reader := &monoToStereoReader{src: bytes.NewReader(monoData)} + + // 第一次读取:请求6字节输出(只能读取1个单声道样本=4字节输出) + buf1 := make([]byte, 6) + n1, err := reader.Read(buf1) + if err != nil { + t.Fatalf("第一次读取失败: %v", err) + } + if n1 != 4 { + t.Fatalf("第一次读取期望4字节,实际%d字节", n1) + } + + // 第二次读取:请求10字节输出(读取剩余2个单声道样本=8字节输出) + buf2 := make([]byte, 10) + n2, err := reader.Read(buf2) + if err != nil { + t.Fatalf("第二次读取失败: %v", err) + } + // 剩余2个单声道样本转换为8字节立体声 + if n2 != 8 { + t.Fatalf("第二次读取期望8字节,实际%d字节", n2) + } + + // 第三次读取:应该返回EOF + buf3 := make([]byte, 10) + n3, err := reader.Read(buf3) + if err != io.EOF { + t.Fatalf("第三次读取期望EOF,实际: %v", err) + } + if n3 != 0 { + t.Fatalf("第三次读取EOF时期望0字节,实际%d字节", n3) + } +}