feat(VoiceInput): 优化语音识别功能
- 实现 2pass 语音识别,区分在线和离线识别结果 - 添加语音降噪处理,提高识别准确性 - 优化实时文本显示逻辑,支持在线和离线结果的动态更新 - 调整 WebSocket 消息处理,支持不同类型的消息
This commit is contained in:
parent
118c2805ad
commit
f3f72ae654
|
@ -66,7 +66,7 @@
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script setup>
|
<script setup>
|
||||||
import { ref, onMounted, onUnmounted } from 'vue'
|
import { ref, onMounted, onUnmounted, computed } from 'vue'
|
||||||
import { ElButton, ElMessage, ElIcon } from 'element-plus'
|
import { ElButton, ElMessage, ElIcon } from 'element-plus'
|
||||||
import { Microphone, Check, Close } from '@element-plus/icons-vue'
|
import { Microphone, Check, Close } from '@element-plus/icons-vue'
|
||||||
import Recorder from 'recorder-core'
|
import Recorder from 'recorder-core'
|
||||||
|
@ -94,7 +94,6 @@ const speakStore = useSpeakStore()
|
||||||
const isRecording = ref(false)
|
const isRecording = ref(false)
|
||||||
const isVoiceToTextConverting = ref(false)
|
const isVoiceToTextConverting = ref(false)
|
||||||
const showVoiceModal = ref(false)
|
const showVoiceModal = ref(false)
|
||||||
const realTimeText = ref('')
|
|
||||||
const remainingTime = ref(30)
|
const remainingTime = ref(30)
|
||||||
const pcmData = ref([])
|
const pcmData = ref([])
|
||||||
let recorder = null
|
let recorder = null
|
||||||
|
@ -104,6 +103,15 @@ let audioChunks = []
|
||||||
let sampleBuf = new Int16Array(0)
|
let sampleBuf = new Int16Array(0)
|
||||||
const chunk_size = 960 // 16kHz下60ms一帧
|
const chunk_size = 960 // 16kHz下60ms一帧
|
||||||
|
|
||||||
|
// 语音识别内容管理
|
||||||
|
const finalTexts = ref([]) // 存储所有offline内容
|
||||||
|
let currentOnlineText = '' // 当前online内容
|
||||||
|
|
||||||
|
// 计算最终展示内容
|
||||||
|
const realTimeText = computed(() => {
|
||||||
|
return finalTexts.value.join('') + currentOnlineText
|
||||||
|
})
|
||||||
|
|
||||||
// WebSocket configuration
|
// WebSocket configuration
|
||||||
const wsConfig = {
|
const wsConfig = {
|
||||||
url: 'wss://fire.lexcubia.com:31003/ws',
|
url: 'wss://fire.lexcubia.com:31003/ws',
|
||||||
|
@ -116,9 +124,18 @@ const wsConfig = {
|
||||||
msgHandle: (event) => {
|
msgHandle: (event) => {
|
||||||
try {
|
try {
|
||||||
const data = JSON.parse(event.data)
|
const data = JSON.parse(event.data)
|
||||||
if (data.text) {
|
if (data.mode === '2pass-online') {
|
||||||
realTimeText.value += data.text
|
currentOnlineText = data.text || ''
|
||||||
|
} else if (data.mode === '2pass-offline' || data.mode === 'offline') {
|
||||||
|
if (data.text) {
|
||||||
|
finalTexts.value.push(data.text)
|
||||||
|
}
|
||||||
|
currentOnlineText = ''
|
||||||
}
|
}
|
||||||
|
// 处理整体结束
|
||||||
|
// if (data.is_final === true) {
|
||||||
|
// closeVoiceModal()
|
||||||
|
// }
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('WebSocket message parse error:', error)
|
console.error('WebSocket message parse error:', error)
|
||||||
}
|
}
|
||||||
|
@ -134,7 +151,6 @@ const openVoiceModal = async () => {
|
||||||
if (props.isDisabled) return
|
if (props.isDisabled) return
|
||||||
showVoiceModal.value = true
|
showVoiceModal.value = true
|
||||||
isRecording.value = false
|
isRecording.value = false
|
||||||
realTimeText.value = ''
|
|
||||||
remainingTime.value = 30
|
remainingTime.value = 30
|
||||||
audioChunks = []
|
audioChunks = []
|
||||||
|
|
||||||
|
@ -182,7 +198,6 @@ const closeVoiceModal = () => {
|
||||||
|
|
||||||
// 4. 清理数据
|
// 4. 清理数据
|
||||||
audioChunks = []
|
audioChunks = []
|
||||||
realTimeText.value = ''
|
|
||||||
remainingTime.value = 30
|
remainingTime.value = 30
|
||||||
sampleBuf = new Int16Array(0)
|
sampleBuf = new Int16Array(0)
|
||||||
pcmData.value = []
|
pcmData.value = []
|
||||||
|
@ -226,12 +241,22 @@ const startRecording = (stream) => {
|
||||||
// 1. 重采样为16kHz
|
// 1. 重采样为16kHz
|
||||||
const array_48k = [pcm48k]
|
const array_48k = [pcm48k]
|
||||||
const data_16k = Recorder.SampleData(array_48k, sampleRate, 16000).data
|
const data_16k = Recorder.SampleData(array_48k, sampleRate, 16000).data
|
||||||
// 2. 拼接到sampleBuf
|
|
||||||
const merged = new Int16Array(sampleBuf.length + data_16k.length)
|
// 2. 降噪处理
|
||||||
|
const noiseThreshold = 1000 // 噪声阈值,可以根据实际情况调整
|
||||||
|
const denoisedData = new Int16Array(data_16k.length)
|
||||||
|
for (let i = 0; i < data_16k.length; i++) {
|
||||||
|
// 如果样本值小于阈值,则认为是噪声,将其置为0
|
||||||
|
denoisedData[i] = Math.abs(data_16k[i]) < noiseThreshold ? 0 : data_16k[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. 拼接到sampleBuf
|
||||||
|
const merged = new Int16Array(sampleBuf.length + denoisedData.length)
|
||||||
merged.set(sampleBuf)
|
merged.set(sampleBuf)
|
||||||
merged.set(data_16k, sampleBuf.length)
|
merged.set(denoisedData, sampleBuf.length)
|
||||||
sampleBuf = merged
|
sampleBuf = merged
|
||||||
// 3. 分片发送
|
|
||||||
|
// 4. 分片发送
|
||||||
while(sampleBuf.length >= chunk_size) {
|
while(sampleBuf.length >= chunk_size) {
|
||||||
const sendBuf = sampleBuf.slice(0, chunk_size)
|
const sendBuf = sampleBuf.slice(0, chunk_size)
|
||||||
sampleBuf = sampleBuf.slice(chunk_size)
|
sampleBuf = sampleBuf.slice(chunk_size)
|
||||||
|
@ -239,8 +264,9 @@ const startRecording = (stream) => {
|
||||||
websocketClient.send(sendBuf.buffer)
|
websocketClient.send(sendBuf.buffer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 4. 波形显示
|
|
||||||
pcmData.value = Array.from(data_16k)
|
// 5. 波形显示
|
||||||
|
pcmData.value = Array.from(denoisedData)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
Loading…
Reference in New Issue