feat(voice): 实现语音输入功能

- 添加 WebSocket 通用工具类 - 集成 recorder-core 库实现音频录制 - 添加音频波形显示和实时文本显示功能 - 实现语音数据实时发送到 WebSocket 服务器 - 优化语音输入界面和交互逻辑
2025-04-27 11:51:47 +08:00 · 2025-04-27 11:51:47 +08:00 · 15514759e2
parent 6a20e26e5d
commit 15514759e2
4 changed files with 395 additions and 107 deletions
--- a/package.json
+++ b/package.json
@ -42,10 +42,13 @@
    "echarts": "^5.6.0",
    "element-plus": "^2.9.7",
    "marked": "^15.0.8",
+    "pcm": "^1.0.3",
    "pinia": "^3.0.2",
    "pinia-plugin-persistedstate": "^4.2.0",
+    "recorder-core": "^1.3.25011100",
    "vue": "^3.5.13",
-    "vue-router": "^4.5.0"
+    "vue-router": "^4.5.0",
+    "wav": "^1.0.2"
  },
  "devDependencies": {
    "@types/node": "^20.11.19",
--- a/src/utils/websocket.js
+++ b/src/utils/websocket.js
@ -0,0 +1,128 @@
+/**
+ * WebSocket 通用工具类
+ * @author Lexcubia
+ * @date 2024-04-25
+ */
+
+class WebSocketClient {
+  constructor(config) {
+    this.socket = null;
+    this.msgHandle = config.msgHandle;
+    this.stateHandle = config.stateHandle;
+    this.config = config;
+  }
+
+  /**
+   * 建立 WebSocket 连接
+   * @param {string} url - WebSocket 服务器地址
+   * @returns {boolean} - 连接是否成功
+   */
+  connect(url) {
+    if (!url) {
+      console.error('WebSocket URL 不能为空');
+      return false;
+    }
+
+    if (!url.match(/wss?:\S*/)) {
+      console.error('WebSocket URL 格式不正确');
+      return false;
+    }
+
+    if (!('WebSocket' in window)) {
+      console.error('当前浏览器不支持 WebSocket');
+      return false;
+    }
+
+    try {
+      this.socket = new WebSocket(url);
+      this.socket.onopen = this._onOpen.bind(this);
+      this.socket.onclose = this._onClose.bind(this);
+      this.socket.onmessage = this._onMessage.bind(this);
+      this.socket.onerror = this._onError.bind(this);
+      return true;
+    } catch (error) {
+      console.error('WebSocket 连接失败:', error);
+      return false;
+    }
+  }
+
+  /**
+   * 关闭 WebSocket 连接
+   */
+  disconnect() {
+    if (this.socket) {
+      this.socket.close();
+      this.socket = null;
+    }
+  }
+
+  /**
+   * 发送数据
+   * @param {any} data - 要发送的数据
+   */
+  send(data) {
+    if (!this.socket || this.socket.readyState !== WebSocket.OPEN) {
+      console.error('WebSocket 未连接');
+      return;
+    }
+
+    try {
+      if (typeof data === 'object') {
+        data = JSON.stringify(data);
+      }
+      this.socket.send(data);
+    } catch (error) {
+      console.error('发送数据失败:', error);
+    }
+  }
+
+  /**
+   * 连接建立时的回调
+   * @private
+   */
+  _onOpen() {
+    console.log('WebSocket 连接成功');
+    this.stateHandle(0);
+    
+    // 发送初始化配置
+    const initConfig = {
+      chunk_size: [5, 10, 5],
+      wav_name: 'h5',
+      is_speaking: true,
+      chunk_interval: 10,
+      itn: this.config.itn || false,
+      mode: this.config.mode || '2pass',
+      ...this.config.initConfig
+    };
+
+    this.send(initConfig);
+  }
+
+  /**
+   * 连接关闭时的回调
+   * @private
+   */
+  _onClose() {
+    console.log('WebSocket 连接关闭');
+    this.stateHandle(1);
+  }
+
+  /**
+   * 收到消息时的回调
+   * @private
+   */
+  _onMessage(event) {
+    this.msgHandle(event);
+  }
+
+  /**
+   * 发生错误时的回调
+   * @private
+   */
+  _onError(error) {
+    console.error('WebSocket 错误:', error);
+    this.stateHandle(2);
+  }
+}
+
+export default WebSocketClient; 
--- a/src/views/chat/components/VoiceInput.vue
+++ b/src/views/chat/components/VoiceInput.vue
@ -15,8 +15,16 @@
      <!-- Voice Input Modal -->
      <div class="voice-input-modal-overlay" v-if="showVoiceModal">
        <div class="voice-bubble">
-          <!-- TODO: Add voice visualizer here -->
-          <div class="voice-wave-placeholder">语音波形区域</div>
+          <!-- Voice Visualizer -->
+          <AudioWaveform :pcmData="pcmData" />
+          <!-- Timer Display -->
+          <div class="timer" :class="{ 'warning': remainingTime <= 5 }">
+            {{ formatTime(remainingTime) }}
+          </div>
+          <!-- Real-time Text Display -->
+          <div class="real-time-text" v-if="realTimeText">
+            {{ realTimeText }}
+          </div>
        </div>
        <div class="voice-modal-controls">
          <div class="modal-control-item">
@ -36,12 +44,12 @@
              >
              </el-button>
          </div>
-          <div class="modal-control-item">
+          <!-- <div class="modal-control-item">
            <span class="modal-btn-label">转文字</span>
            <el-button class="modal-btn text-send-btn" circle @click="sendAsText">
               <el-icon><ChatDotSquare /></el-icon>
            </el-button>
-          </div>
+          </div> -->
        </div>
        <div class="voice-modal-tip">{{ isRecording ? '正在录音...' : '点击麦克风开始录音' }}</div>
      </div>
@ -61,6 +69,11 @@
 import { ref, onMounted, onUnmounted } from 'vue'
 import { ElButton, ElMessage, ElIcon } from 'element-plus'
 import { Microphone, ChatDotSquare, Close } from '@element-plus/icons-vue'
+import Recorder from 'recorder-core'
+import 'recorder-core/src/engine/pcm'
+import 'recorder-core/src/engine/wav'
+import WebSocketClient from '@/utils/websocket'
+import AudioWaveform from '@/components/AudioWaveform.vue'

 // Props definition
 const props = defineProps({
@ -71,137 +84,224 @@ const props = defineProps({
 })

 // Emits definition
-const emit = defineEmits(['sendText'])
+const emit = defineEmits(['sendText', 'sendVoice'])

 // Refs related to voice input
 const isRecording = ref(false)
 const isVoiceToTextConverting = ref(false)
 const showVoiceModal = ref(false)
-let mediaRecorder = null
+const realTimeText = ref('')
+const remainingTime = ref(30)
+const pcmData = ref([])
+let recorder = null
+let websocketClient = null
+let timer = null
 let audioChunks = []

-// --- Voice Input Logic ---
-
-const openVoiceModal = () => {
-  if (props.isDisabled) return;
-  showVoiceModal.value = true;
-  // Reset state in case modal was closed unexpectedly before
-  isRecording.value = false; 
-  audioChunks = [];
-}
-
-const closeVoiceModal = () => {
-  showVoiceModal.value = false;
-  if (isRecording.value) {
-    stopRecording(); // Stop recording if modal is closed while recording
-    isRecording.value = false;
+// WebSocket configuration
+const wsConfig = {
+  url: 'wss://160.202.224.52:10096/',
+  initConfig: {
+    mode: '2pass',
+    wav_format: 'pcm',
+    chunk_size: [5, 10, 5],
+    itn: true
+  },
+  msgHandle: (event) => {
+    try {
+      const data = JSON.parse(event.data)
+      if (data.text) {
+        realTimeText.value = data.text
+      }
+    } catch (error) {
+      console.error('WebSocket message parse error:', error)
+    }
+  },
+  stateHandle: (state) => {
+    console.log('WebSocket state:', state)
  }
 }

+// --- Voice Input Logic ---
+
+const openVoiceModal = async () => {
+  if (props.isDisabled) return
+  showVoiceModal.value = true
+  isRecording.value = false
+  realTimeText.value = ''
+  remainingTime.value = 30
+  audioChunks = []
+
+  // 自动开始录音
+  try {
+    const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
+    startRecording(stream)
+    isRecording.value = true
+    startTimer()
+    connectWebSocket()
+  } catch (err) {
+    ElMessage.error('无法访问麦克风，请检查权限设置')
+    console.error('获取麦克风权限失败:', err)
+    closeVoiceModal()
+  }
+}
+
+const closeVoiceModal = () => {
+  showVoiceModal.value = false
+  if (isRecording.value) {
+    stopRecording()
+    isRecording.value = false
+  }
+  clearInterval(timer)
+  
+  // 释放麦克风资源
+  if (recorder) {
+    recorder.close(() => {
+      // 确保所有音频轨道都被停止
+      if (recorder.stream) {
+        recorder.stream.getTracks().forEach(track => track.stop())
+      }
+      recorder = null
+    })
+  }
+  
+  // 关闭WebSocket连接
+  if (websocketClient) {
+    websocketClient.disconnect()
+    websocketClient = null
+  }
+  
+  // 清理音频数据
+  audioChunks = []
+  realTimeText.value = ''
+  remainingTime.value = 30
+}
+
 const cancelVoiceInputFromModal = () => {
-  closeVoiceModal();
+  closeVoiceModal()
 }

 const confirmVoiceInput = async () => {
-  if (!isRecording.value) {
-    // Start recording
-    try {
-      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-      startRecording(stream);
-      isRecording.value = true;
-    } catch (err) {
-      ElMessage.error('无法访问麦克风，请检查权限设置');
-      console.error('获取麦克风权限失败:', err);
-      closeVoiceModal(); // Close modal on error
-    }
-  } else {
-    // Stop recording and process
-    stopRecording();
-    isRecording.value = false;
+  if (isRecording.value) {
+    stopRecording()
+    isRecording.value = false
+    clearInterval(timer)
    
    if (audioChunks.length === 0) {
-        ElMessage.warning('录音时间太短');
-        closeVoiceModal();
-        return;
+      ElMessage.warning('录音时间太短')
+      closeVoiceModal()
+      return
    }

-    isVoiceToTextConverting.value = true; // Show converting overlay
-    closeVoiceModal(); // Close the modal after stopping
-    
-    // Simulate conversion & emit result
-    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
-    console.log('录音完成，模拟转换，音频大小:', audioBlob.size); 
-    setTimeout(() => {
-        const simulatedText = "模拟语音识别结果" + Date.now(); // Example text
-        emit('sendText', simulatedText);
-        isVoiceToTextConverting.value = false;
-        audioChunks = []; // Clear chunks after processing
-    }, 1500);
+    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' })
+    emit('sendVoice', audioBlob)
+    closeVoiceModal()
  }
 }

 const sendAsText = () => {
-  // Currently, just acts like confirm/stop
  if (isRecording.value) {
-    confirmVoiceInput(); 
+    stopRecording()
+    isRecording.value = false
+    clearInterval(timer)
+    
+    if (realTimeText.value) {
+      emit('sendText', realTimeText.value)
+    } else {
+      ElMessage.warning('没有识别到文字')
+    }
+    closeVoiceModal()
  } else {
-    // If not recording, maybe do nothing or just close?
-    closeVoiceModal();
+    closeVoiceModal()
  }
 }

 const startRecording = (stream) => {
-  audioChunks = []; // Clear previous chunks
-  mediaRecorder = new MediaRecorder(stream);
-  
-  mediaRecorder.ondataavailable = (event) => {
-    if (event.data.size > 0) {
-        audioChunks.push(event.data);
+  audioChunks = []
+  recorder = new Recorder({
+    type: 'pcm', // 音频格式
+    sampleRate: 16000, // 采样率
+    bitRate: 16, // 比特率
+    onProcess: (buffers, powerLevel, duration, sampleRate) => {
+      const pcm = buffers[buffers.length - 1]
+      if (pcm && pcm.length > 0) {
+        pcmData.value = Array.from(pcm)
+      }
+      // WebSocket 发送逻辑保留
+      if (websocketClient && websocketClient.socket.readyState === WebSocket.OPEN) {
+        const pcmDataSend = buffers[buffers.length - 1]
+        websocketClient.send({
+          is_speaking: true,
+          audio_data: pcmDataSend
+        })
+      }
    }
-  };
-  
-  mediaRecorder.onstop = () => {
-    // Stop associated media stream tracks
-    mediaRecorder.stream.getTracks().forEach(track => track.stop());
-    // Note: Actual processing is now triggered in confirmVoiceInput after calling stopRecording
-  };
+  })

-  mediaRecorder.onerror = (event) => {
-      console.error('MediaRecorder error:', event.error);
-      ElMessage.error('录音出错');
-      isRecording.value = false; // Reset state on error
-      closeVoiceModal();
-  };
-  
-  mediaRecorder.start();
+  recorder.open(() => {
+    recorder.start()
+  }, (msg, isUserNotAllow) => {
+    ElMessage.error('录音失败: ' + msg)
+    closeVoiceModal()
+  })
 }

 const stopRecording = () => {
-  if (mediaRecorder && mediaRecorder.state === 'recording') {
-    try {
-        mediaRecorder.stop();
-    } catch (error) {
-        console.error("Error stopping MediaRecorder:", error);
-        // Attempt to stop tracks directly as a fallback
-        if (mediaRecorder.stream) {
-            mediaRecorder.stream.getTracks().forEach(track => track.stop());
+  if (recorder) {
+    recorder.stop((blob, duration) => {
+      // Convert PCM to WAV before saving
+      Recorder.pcm2wav({
+        blob: blob,
+        sampleRate: 16000,
+        bitRate: 16
+      }, (wavBlob) => {
+        audioChunks.push(wavBlob)
+        // Send end signal to WebSocket
+        if (websocketClient && websocketClient.socket.readyState === WebSocket.OPEN) {
+          websocketClient.send({
+            is_speaking: false
+          })
        }
-    }
+      }, (msg) => {
+        console.error('PCM转WAV失败:', msg)
+      })
+    }, (msg) => {
+      console.error('录音停止失败:', msg)
+    })
  }
-   // Reset recorder instance
-  mediaRecorder = null; 
+}
+
+const startTimer = () => {
+  timer = setInterval(() => {
+    remainingTime.value--
+    if (remainingTime.value <= 0) {
+      stopRecording()
+      isRecording.value = false
+      clearInterval(timer)
+    }
+  }, 1000)
+}
+
+const formatTime = (seconds) => {
+  const mins = Math.floor(seconds / 60)
+  const secs = seconds % 60
+  return `${mins}:${secs.toString().padStart(2, '0')}`
+}
+
+const connectWebSocket = () => {
+  websocketClient = new WebSocketClient(wsConfig)
+  websocketClient.connect(wsConfig.url)
 }

 // --- Lifecycle hooks ---
 onMounted(() => {
  if (!('MediaRecorder' in window) || !navigator.mediaDevices) {
-    console.warn('浏览器不支持语音录制功能');
-    // Optionally disable voice functionality entirely
+    console.warn('浏览器不支持语音录制功能')
  }
 })

 onUnmounted(() => {
-  stopRecording(); // Ensure recording is stopped on component unmount
+  closeVoiceModal()
 })

 </script>
@ -247,8 +347,40 @@ onUnmounted(() => {
  text-align: center;
  position: relative; 

-  .voice-wave-placeholder {
+  .voice-wave {
+    width: 100%;
+    height: 48px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    margin: 10px 0;
+    background: transparent;
+  }
+
+  .voice-wave canvas {
+    width: 240px;
+    height: 48px;
+    display: block;
+    background: transparent;
+  }
+
+  .timer {
    font-size: 14px;
+    color: white;
+    margin: 5px 0;
+    
+    &.warning {
+      color: #ff4d4f;
+      animation: blink 1s infinite;
+    }
+  }
+
+  .real-time-text {
+    font-size: 14px;
+    color: white;
+    margin: 10px 0;
+    min-height: 20px;
+    word-break: break-all;
  }

  &::after {
@ -395,4 +527,29 @@ html.dark .voice-to-text-content {
 html.dark .converting-text {
  color: #eee;
 }
+
+.timer {
+  font-size: 14px;
+  color: white;
+  margin: 5px 0;
+  
+  &.warning {
+    color: #ff4d4f;
+    animation: blink 1s infinite;
+  }
+}
+
+.real-time-text {
+  font-size: 14px;
+  color: white;
+  margin: 10px 0;
+  min-height: 20px;
+  word-break: break-all;
+}
+
+@keyframes blink {
+  0% { opacity: 1; }
+  50% { opacity: 0.5; }
+  100% { opacity: 1; }
+}
 </style> 
--- a/vite.config.js
+++ b/vite.config.js
@ -58,21 +58,21 @@ export default defineConfig(({ command, mode }) => {
          rewrite: (path) => path.replace(/^\/api/, ''),
          secure: false,
          configure: (proxy, options) => {
-            proxy.on('proxyReq', (proxyReq, req, res) => {
-              const targetOrigin = new URL(env.VITE_APP_BASE_API).origin;
-              proxyReq.setHeader('Origin', targetOrigin);
+            // proxy.on('proxyReq', (proxyReq, req, res) => {
+            //   const targetOrigin = new URL(env.VITE_APP_BASE_API).origin;
+            //   proxyReq.setHeader('Origin', targetOrigin);
              
-              if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
-                proxyReq.setHeader('Accept', 'text/event-stream');
-              }
-            });
-            proxy.on('proxyRes', (proxyRes, req, res) => {
-              if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
-                proxyRes.headers['content-type'] = 'text/event-stream';
-                proxyRes.headers['Cache-Control'] = 'no-cache';
-                proxyRes.headers['Connection'] = 'keep-alive';
-              }
-            });
+            //   if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
+            //     proxyReq.setHeader('Accept', 'text/event-stream');
+            //   }
+            // });
+            // proxy.on('proxyRes', (proxyRes, req, res) => {
+            //   if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
+            //     proxyRes.headers['content-type'] = 'text/event-stream';
+            //     proxyRes.headers['Cache-Control'] = 'no-cache';
+            //     proxyRes.headers['Connection'] = 'keep-alive';
+            //   }
+            // });
          }
        }
      }