feat(VoiceInput): 优化语音识别功能

- 实现 2pass 语音识别,区分在线和离线识别结果
- 添加语音降噪处理,提高识别准确性
- 优化实时文本显示逻辑,支持在线和离线结果的动态更新
- 调整 WebSocket 消息处理,支持不同类型的消息
This commit is contained in:
Lexcubia 2025-04-28 11:54:13 +08:00
parent 118c2805ad
commit f3f72ae654
1 changed files with 38 additions and 12 deletions

View File

@ -66,7 +66,7 @@
</template>
<script setup>
import { ref, onMounted, onUnmounted } from 'vue'
import { ref, onMounted, onUnmounted, computed } from 'vue'
import { ElButton, ElMessage, ElIcon } from 'element-plus'
import { Microphone, Check, Close } from '@element-plus/icons-vue'
import Recorder from 'recorder-core'
@ -94,7 +94,6 @@ const speakStore = useSpeakStore()
const isRecording = ref(false)
const isVoiceToTextConverting = ref(false)
const showVoiceModal = ref(false)
const realTimeText = ref('')
const remainingTime = ref(30)
const pcmData = ref([])
let recorder = null
@ -104,6 +103,15 @@ let audioChunks = []
let sampleBuf = new Int16Array(0)
const chunk_size = 960 // 16kHz60ms
//
const finalTexts = ref([]) // offline
let currentOnlineText = '' // online
//
const realTimeText = computed(() => {
return finalTexts.value.join('') + currentOnlineText
})
// WebSocket configuration
const wsConfig = {
url: 'wss://fire.lexcubia.com:31003/ws',
@ -116,9 +124,18 @@ const wsConfig = {
msgHandle: (event) => {
try {
const data = JSON.parse(event.data)
if (data.mode === '2pass-online') {
currentOnlineText = data.text || ''
} else if (data.mode === '2pass-offline' || data.mode === 'offline') {
if (data.text) {
realTimeText.value += data.text
finalTexts.value.push(data.text)
}
currentOnlineText = ''
}
//
// if (data.is_final === true) {
// closeVoiceModal()
// }
} catch (error) {
console.error('WebSocket message parse error:', error)
}
@ -134,7 +151,6 @@ const openVoiceModal = async () => {
if (props.isDisabled) return
showVoiceModal.value = true
isRecording.value = false
realTimeText.value = ''
remainingTime.value = 30
audioChunks = []
@ -182,7 +198,6 @@ const closeVoiceModal = () => {
// 4.
audioChunks = []
realTimeText.value = ''
remainingTime.value = 30
sampleBuf = new Int16Array(0)
pcmData.value = []
@ -226,12 +241,22 @@ const startRecording = (stream) => {
// 1. 16kHz
const array_48k = [pcm48k]
const data_16k = Recorder.SampleData(array_48k, sampleRate, 16000).data
// 2. sampleBuf
const merged = new Int16Array(sampleBuf.length + data_16k.length)
// 2.
const noiseThreshold = 1000 //
const denoisedData = new Int16Array(data_16k.length)
for (let i = 0; i < data_16k.length; i++) {
// 0
denoisedData[i] = Math.abs(data_16k[i]) < noiseThreshold ? 0 : data_16k[i]
}
// 3. sampleBuf
const merged = new Int16Array(sampleBuf.length + denoisedData.length)
merged.set(sampleBuf)
merged.set(data_16k, sampleBuf.length)
merged.set(denoisedData, sampleBuf.length)
sampleBuf = merged
// 3.
// 4.
while(sampleBuf.length >= chunk_size) {
const sendBuf = sampleBuf.slice(0, chunk_size)
sampleBuf = sampleBuf.slice(chunk_size)
@ -239,8 +264,9 @@ const startRecording = (stream) => {
websocketClient.send(sendBuf.buffer)
}
}
// 4.
pcmData.value = Array.from(data_16k)
// 5.
pcmData.value = Array.from(denoisedData)
}
}
})