feat(VoiceInput): 优化语音识别功能

- 实现 2pass 语音识别，区分在线和离线识别结果 - 添加语音降噪处理，提高识别准确性 - 优化实时文本显示逻辑，支持在线和离线结果的动态更新 - 调整 WebSocket 消息处理，支持不同类型的消息
2025-04-28 11:54:13 +08:00 · 2025-04-28 11:54:13 +08:00 · f3f72ae654
parent 118c2805ad
commit f3f72ae654
1 changed files with 38 additions and 12 deletions
--- a/src/views/chat/components/VoiceInput.vue
+++ b/src/views/chat/components/VoiceInput.vue
@ -66,7 +66,7 @@
 </template>

 <script setup>
-import { ref, onMounted, onUnmounted } from 'vue'
+import { ref, onMounted, onUnmounted, computed } from 'vue'
 import { ElButton, ElMessage, ElIcon } from 'element-plus'
 import { Microphone, Check, Close } from '@element-plus/icons-vue'
 import Recorder from 'recorder-core'
@ -94,7 +94,6 @@ const speakStore = useSpeakStore()
 const isRecording = ref(false)
 const isVoiceToTextConverting = ref(false)
 const showVoiceModal = ref(false)
-const realTimeText = ref('')
 const remainingTime = ref(30)
 const pcmData = ref([])
 let recorder = null
@ -104,6 +103,15 @@ let audioChunks = []
 let sampleBuf = new Int16Array(0)
 const chunk_size = 960 // 16kHz下60ms一帧

+// 语音识别内容管理
+const finalTexts = ref([]) // 存储所有offline内容
+let currentOnlineText = '' // 当前online内容
+
+// 计算最终展示内容
+const realTimeText = computed(() => {
+  return finalTexts.value.join('') + currentOnlineText
+})
+
 // WebSocket configuration
 const wsConfig = {
  url: 'wss://fire.lexcubia.com:31003/ws',
@ -116,9 +124,18 @@ const wsConfig = {
  msgHandle: (event) => {
    try {
      const data = JSON.parse(event.data)
+      if (data.mode === '2pass-online') {
+        currentOnlineText = data.text || ''
+      } else if (data.mode === '2pass-offline' || data.mode === 'offline') {
        if (data.text) {
-        realTimeText.value += data.text
+          finalTexts.value.push(data.text)
        }
+        currentOnlineText = ''
+      }
+      // 处理整体结束
+      // if (data.is_final === true) {
+      //   closeVoiceModal()
+      // }
    } catch (error) {
      console.error('WebSocket message parse error:', error)
    }
@ -134,7 +151,6 @@ const openVoiceModal = async () => {
  if (props.isDisabled) return
  showVoiceModal.value = true
  isRecording.value = false
-  realTimeText.value = ''
  remainingTime.value = 30
  audioChunks = []

@ -182,7 +198,6 @@ const closeVoiceModal = () => {

  // 4. 清理数据
  audioChunks = []
-  realTimeText.value = ''
  remainingTime.value = 30
  sampleBuf = new Int16Array(0)
  pcmData.value = []
@ -226,12 +241,22 @@ const startRecording = (stream) => {
        // 1. 重采样为16kHz
        const array_48k = [pcm48k]
        const data_16k = Recorder.SampleData(array_48k, sampleRate, 16000).data
-        // 2. 拼接到sampleBuf
-        const merged = new Int16Array(sampleBuf.length + data_16k.length)
+        
+        // 2. 降噪处理
+        const noiseThreshold = 1000 // 噪声阈值，可以根据实际情况调整
+        const denoisedData = new Int16Array(data_16k.length)
+        for (let i = 0; i < data_16k.length; i++) {
+          // 如果样本值小于阈值，则认为是噪声，将其置为0
+          denoisedData[i] = Math.abs(data_16k[i]) < noiseThreshold ? 0 : data_16k[i]
+        }
+        
+        // 3. 拼接到sampleBuf
+        const merged = new Int16Array(sampleBuf.length + denoisedData.length)
        merged.set(sampleBuf)
-        merged.set(data_16k, sampleBuf.length)
+        merged.set(denoisedData, sampleBuf.length)
        sampleBuf = merged
-        // 3. 分片发送
+        
+        // 4. 分片发送
        while(sampleBuf.length >= chunk_size) {
          const sendBuf = sampleBuf.slice(0, chunk_size)
          sampleBuf = sampleBuf.slice(chunk_size)
@ -239,8 +264,9 @@ const startRecording = (stream) => {
            websocketClient.send(sendBuf.buffer)
          }
        }
-        // 4. 波形显示
-        pcmData.value = Array.from(data_16k)
+        
+        // 5. 波形显示
+        pcmData.value = Array.from(denoisedData)
      }
    }
  })