feat(VoiceInput): 优化语音识别功能

- 实现 2pass 语音识别，区分在线和离线识别结果 - 添加语音降噪处理，提高识别准确性 - 优化实时文本显示逻辑，支持在线和离线结果的动态更新 - 调整 WebSocket 消息处理，支持不同类型的消息
2025-04-28 11:54:13 +08:00 · 2025-04-28 11:54:13 +08:00 · f3f72ae654
parent 118c2805ad
commit f3f72ae654
1 changed files with 38 additions and 12 deletions
--- a/src/views/chat/components/VoiceInput.vue
+++ b/src/views/chat/components/VoiceInput.vue
@ -66,7 +66,7 @@
 </template>
 <script setup>
-import { ref, onMounted, onUnmounted } from 'vue'
+import { ref, onMounted, onUnmounted, computed } from 'vue'
 import { ElButton, ElMessage, ElIcon } from 'element-plus'
 import { Microphone, Check, Close } from '@element-plus/icons-vue'
 import Recorder from 'recorder-core'
@ -94,7 +94,6 @@ const speakStore = useSpeakStore()
 const isRecording = ref(false)
 const isVoiceToTextConverting = ref(false)
 const showVoiceModal = ref(false)
 const realTimeText = ref('')
 const remainingTime = ref(30)
 const pcmData = ref([])
 let recorder = null
@ -104,6 +103,15 @@ let audioChunks = []
 let sampleBuf = new Int16Array(0)
 const chunk_size = 960 // 16kHz下60ms一帧
 // 语音识别内容管理
 const finalTexts = ref([]) // 存储所有offline内容
 let currentOnlineText = '' // 当前online内容
 // 计算最终展示内容
 const realTimeText = computed(() => {
  return finalTexts.value.join('') + currentOnlineText
 })
 // WebSocket configuration
 const wsConfig = {
  url: 'wss://fire.lexcubia.com:31003/ws',
@ -116,9 +124,18 @@ const wsConfig = {
  msgHandle: (event) => {
    try {
      const data = JSON.parse(event.data)
-      if (data.text) {
+      if (data.mode === '2pass-online') {
-        realTimeText.value += data.text
+        currentOnlineText = data.text || ''
      } else if (data.mode === '2pass-offline' || data.mode === 'offline') {
        if (data.text) {
          finalTexts.value.push(data.text)
        }
        currentOnlineText = ''
      }
      // 处理整体结束
      // if (data.is_final === true) {
      //   closeVoiceModal()
      // }
    } catch (error) {
      console.error('WebSocket message parse error:', error)
    }
@ -134,7 +151,6 @@ const openVoiceModal = async () => {
  if (props.isDisabled) return
  showVoiceModal.value = true
  isRecording.value = false
  realTimeText.value = ''
  remainingTime.value = 30
  audioChunks = []
@ -182,7 +198,6 @@ const closeVoiceModal = () => {
  // 4. 清理数据
  audioChunks = []
  realTimeText.value = ''
  remainingTime.value = 30
  sampleBuf = new Int16Array(0)
  pcmData.value = []
@ -226,12 +241,22 @@ const startRecording = (stream) => {
        // 1. 重采样为16kHz
        const array_48k = [pcm48k]
        const data_16k = Recorder.SampleData(array_48k, sampleRate, 16000).data
-        // 2. 拼接到sampleBuf
+        
-        const merged = new Int16Array(sampleBuf.length + data_16k.length)
+        // 2. 降噪处理
        const noiseThreshold = 1000 // 噪声阈值，可以根据实际情况调整
        const denoisedData = new Int16Array(data_16k.length)
        for (let i = 0; i < data_16k.length; i++) {
          // 如果样本值小于阈值，则认为是噪声，将其置为0
          denoisedData[i] = Math.abs(data_16k[i]) < noiseThreshold ? 0 : data_16k[i]
        }
        // 3. 拼接到sampleBuf
        const merged = new Int16Array(sampleBuf.length + denoisedData.length)
        merged.set(sampleBuf)
-        merged.set(data_16k, sampleBuf.length)
+        merged.set(denoisedData, sampleBuf.length)
        sampleBuf = merged
-        // 3. 分片发送
+        
        // 4. 分片发送
        while(sampleBuf.length >= chunk_size) {
          const sendBuf = sampleBuf.slice(0, chunk_size)
          sampleBuf = sampleBuf.slice(chunk_size)
@ -239,8 +264,9 @@ const startRecording = (stream) => {
            websocketClient.send(sendBuf.buffer)
          }
        }
-        // 4. 波形显示
+        
-        pcmData.value = Array.from(data_16k)
+        // 5. 波形显示
        pcmData.value = Array.from(denoisedData)
      }
    }
  })