diff --git a/pkg/audio/doc.go b/pkg/audio/doc.go
index abd3b42..4503353 100644
--- a/pkg/audio/doc.go
+++ b/pkg/audio/doc.go
@@ -31,5 +31,4 @@
 // 资源管理:
 //   - 一次性播放: 函数内部自动管理所有资源
 //   - 循环播放: 调用者必须调用 defer cleanup() 清理资源
-//
 package audio
diff --git a/pkg/audio/play.go b/pkg/audio/play.go
index 5c7e9b7..af70996 100644
--- a/pkg/audio/play.go
+++ b/pkg/audio/play.go
@@ -7,11 +7,51 @@ import (
 	"io"
 	"time"
 
-	"github.com/youpy/go-wav"
 	"github.com/hajimehoshi/go-mp3"
+	"github.com/youpy/go-wav"
 	"go.uber.org/zap"
 )
 
+// monoToStereoReader 将单声道音频转换为立体声
+type monoToStereoReader struct {
+	src io.Reader
+	buf []byte
+}
+
+func (m *monoToStereoReader) Read(p []byte) (int, error) {
+	maxSamples := len(p) / 4
+	if maxSamples == 0 {
+		return 0, nil
+	}
+
+	// 按需分配缓冲区
+	if cap(m.buf) < maxSamples*2 {
+		m.buf = make([]byte, maxSamples*2)
+	}
+
+	// 读取单声道数据
+	n, err := m.src.Read(m.buf[:maxSamples*2])
+	if n == 0 {
+		return 0, err
+	}
+
+	// 单声道→立体声：复制每个样本到左右声道
+	samples := n / 2
+	for i := range samples {
+		base := i * 4
+		mono := i * 2
+		p[base] = m.buf[mono]     // 左声道低字节
+		p[base+1] = m.buf[mono+1] // 左声道高字节
+		p[base+2] = m.buf[mono]   // 右声道低字节
+		p[base+3] = m.buf[mono+1] // 右声道高字节
+	}
+
+	if err == io.EOF {
+		return samples * 4, io.EOF
+	}
+	return samples * 4, nil
+}
+
 // PlayWav 播放 WAV 文件(阻塞),直到完成或 context 取消
 func PlayWav(ctx context.Context, r io.ReadCloser) error {
 	// Read the entire file into memory since wav.NewReader needs ReadAt
@@ -33,15 +73,21 @@ func PlayWav(ctx context.Context, r io.ReadCloser) error {
 
 	duration, _ := dec.Duration()
 	sourceRate := int(format.SampleRate)
+	channels := int(format.NumChannels)
 
-	zap.S().Infof("WAV 音频: %d ch, %d Hz, 时长: %v",
-		format.NumChannels, sourceRate, duration)
+	zap.S().Infof("WAV 音频: %d ch, %d Hz, 时长: %v", channels, sourceRate, duration)
+
+	// 构建处理管线：单声道转换 → 重采样
+	reader := io.Reader(dec)
+	if channels == 1 {
+		zap.S().Infof("单声道转立体声: 1 ch → 2 ch")
+		reader = &monoToStereoReader{src: dec}
+		channels = DefaultChannelCount
+	}
 
-	// 需要重采样（使用 Sinc 高质量重采样）
-	var reader io.Reader = dec
 	if needsResampling(sourceRate) {
-		zap.S().Infof("Sinc 重采样: %d Hz → %d Hz", sourceRate, UniversalSampleRate)
-		reader = newSincResampler(dec, sourceRate, UniversalSampleRate, int(format.NumChannels))
+		zap.S().Infof("Sinc 重采样: %d Hz → %d Hz, %d ch", sourceRate, UniversalSampleRate, channels)
+		reader = newSincResampler(reader, sourceRate, UniversalSampleRate, channels)
 	}
 
 	otoCtx, err := initContext()
diff --git a/pkg/audio/play_test.go b/pkg/audio/play_test.go
index e6eaf80..8986204 100644
--- a/pkg/audio/play_test.go
+++ b/pkg/audio/play_test.go
@@ -1,7 +1,9 @@
 package audio
 
 import (
+	"bytes"
 	"context"
+	"io"
 	"os"
 	"testing"
 	"time"
@@ -75,3 +77,108 @@ func TestPlayContextCancellation(t *testing.T) {
 		t.Errorf("期望 context.Canceled 错误,得到: %v", err)
 	}
 }
+
+// TestMonoToStereoReader 测试单声道转立体声
+func TestMonoToStereoReader(t *testing.T) {
+	// 创建测试数据：4个单声道样本（8字节）
+	monoData := []byte{
+		0x00, 0x10, // 样本1: 0x1000 = 4096
+		0x00, 0x20, // 样本2: 0x2000 = 8192
+		0x00, 0x30, // 样本3: 0x3000 = 12288
+		0x00, 0x40, // 样本4: 0x4000 = 16384
+	}
+
+	reader := &monoToStereoReader{src: bytes.NewReader(monoData)}
+	output := make([]byte, 16) // 应该产生8个样本（16字节）
+
+	n, err := reader.Read(output)
+	if err != nil {
+		t.Fatalf("读取失败: %v", err)
+	}
+
+	if n != 16 {
+		t.Fatalf("期望读取16字节，实际读取%d字节", n)
+	}
+
+	// 验证立体声输出（每个单声道样本被复制到左右声道）
+	expected := []byte{
+		0x00, 0x10, 0x00, 0x10, // 样本1: 左=0x1000, 右=0x1000
+		0x00, 0x20, 0x00, 0x20, // 样本2: 左=0x2000, 右=0x2000
+		0x00, 0x30, 0x00, 0x30, // 样本3: 左=0x3000, 右=0x3000
+		0x00, 0x40, 0x00, 0x40, // 样本4: 左=0x4000, 右=0x4000
+	}
+
+	if !bytes.Equal(output, expected) {
+		t.Errorf("立体声转换不正确\n期望: %x\n实际: %x", expected, output)
+	}
+}
+
+// TestMonoToStereoReaderStreaming 测试流式读取
+func TestMonoToStereoReaderStreaming(t *testing.T) {
+	// 创建较大的测试数据
+	monoData := make([]byte, 1000)
+	for i := range monoData {
+		monoData[i] = byte(i % 256)
+	}
+
+	reader := &monoToStereoReader{src: bytes.NewReader(monoData)}
+	totalRead := 0
+	buf := make([]byte, 32) // 小缓冲区
+
+	for {
+		n, err := reader.Read(buf)
+		totalRead += n
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Fatalf("流式读取失败: %v", err)
+		}
+		if n == 0 {
+			t.Fatal("读取返回0字节但未EOF")
+		}
+	}
+
+	// 1000字节单声道应该转换为2000字节立体声
+	expectedTotal := 2000
+	if totalRead != expectedTotal {
+		t.Fatalf("期望总共读取%d字节，实际读取%d字节", expectedTotal, totalRead)
+	}
+}
+
+// TestMonoToStereoReaderPartialRead 测试部分读取
+func TestMonoToStereoReaderPartialRead(t *testing.T) {
+	monoData := []byte{0x00, 0x10, 0x00, 0x20, 0x00, 0x30} // 3个单声道样本
+	reader := &monoToStereoReader{src: bytes.NewReader(monoData)}
+
+	// 第一次读取：请求6字节输出（只能读取1个单声道样本=4字节输出）
+	buf1 := make([]byte, 6)
+	n1, err := reader.Read(buf1)
+	if err != nil {
+		t.Fatalf("第一次读取失败: %v", err)
+	}
+	if n1 != 4 {
+		t.Fatalf("第一次读取期望4字节，实际%d字节", n1)
+	}
+
+	// 第二次读取：请求10字节输出（读取剩余2个单声道样本=8字节输出）
+	buf2 := make([]byte, 10)
+	n2, err := reader.Read(buf2)
+	if err != nil {
+		t.Fatalf("第二次读取失败: %v", err)
+	}
+	// 剩余2个单声道样本转换为8字节立体声
+	if n2 != 8 {
+		t.Fatalf("第二次读取期望8字节，实际%d字节", n2)
+	}
+
+	// 第三次读取：应该返回EOF
+	buf3 := make([]byte, 10)
+	n3, err := reader.Read(buf3)
+	if err != io.EOF {
+		t.Fatalf("第三次读取期望EOF，实际: %v", err)
+	}
+	if n3 != 0 {
+		t.Fatalf("第三次读取EOF时期望0字节，实际%d字节", n3)
+	}
+}