feat(voice): 实现语音输入功能

- 添加 WebSocket 通用工具类
- 集成 recorder-core 库实现音频录制
- 添加音频波形显示和实时文本显示功能
- 实现语音数据实时发送到 WebSocket 服务器
- 优化语音输入界面和交互逻辑
This commit is contained in:
Lexcubia 2025-04-27 11:51:47 +08:00
parent 6a20e26e5d
commit 15514759e2
4 changed files with 395 additions and 107 deletions

View File

@ -42,10 +42,13 @@
"echarts": "^5.6.0", "echarts": "^5.6.0",
"element-plus": "^2.9.7", "element-plus": "^2.9.7",
"marked": "^15.0.8", "marked": "^15.0.8",
"pcm": "^1.0.3",
"pinia": "^3.0.2", "pinia": "^3.0.2",
"pinia-plugin-persistedstate": "^4.2.0", "pinia-plugin-persistedstate": "^4.2.0",
"recorder-core": "^1.3.25011100",
"vue": "^3.5.13", "vue": "^3.5.13",
"vue-router": "^4.5.0" "vue-router": "^4.5.0",
"wav": "^1.0.2"
}, },
"devDependencies": { "devDependencies": {
"@types/node": "^20.11.19", "@types/node": "^20.11.19",

128
src/utils/websocket.js Normal file
View File

@ -0,0 +1,128 @@
/**
* WebSocket 通用工具类
* @author Lexcubia
* @date 2024-04-25
*/
class WebSocketClient {
constructor(config) {
this.socket = null;
this.msgHandle = config.msgHandle;
this.stateHandle = config.stateHandle;
this.config = config;
}
/**
* 建立 WebSocket 连接
* @param {string} url - WebSocket 服务器地址
* @returns {boolean} - 连接是否成功
*/
connect(url) {
if (!url) {
console.error('WebSocket URL 不能为空');
return false;
}
if (!url.match(/wss?:\S*/)) {
console.error('WebSocket URL 格式不正确');
return false;
}
if (!('WebSocket' in window)) {
console.error('当前浏览器不支持 WebSocket');
return false;
}
try {
this.socket = new WebSocket(url);
this.socket.onopen = this._onOpen.bind(this);
this.socket.onclose = this._onClose.bind(this);
this.socket.onmessage = this._onMessage.bind(this);
this.socket.onerror = this._onError.bind(this);
return true;
} catch (error) {
console.error('WebSocket 连接失败:', error);
return false;
}
}
/**
* 关闭 WebSocket 连接
*/
disconnect() {
if (this.socket) {
this.socket.close();
this.socket = null;
}
}
/**
* 发送数据
* @param {any} data - 要发送的数据
*/
send(data) {
if (!this.socket || this.socket.readyState !== WebSocket.OPEN) {
console.error('WebSocket 未连接');
return;
}
try {
if (typeof data === 'object') {
data = JSON.stringify(data);
}
this.socket.send(data);
} catch (error) {
console.error('发送数据失败:', error);
}
}
/**
* 连接建立时的回调
* @private
*/
_onOpen() {
console.log('WebSocket 连接成功');
this.stateHandle(0);
// 发送初始化配置
const initConfig = {
chunk_size: [5, 10, 5],
wav_name: 'h5',
is_speaking: true,
chunk_interval: 10,
itn: this.config.itn || false,
mode: this.config.mode || '2pass',
...this.config.initConfig
};
this.send(initConfig);
}
/**
* 连接关闭时的回调
* @private
*/
_onClose() {
console.log('WebSocket 连接关闭');
this.stateHandle(1);
}
/**
* 收到消息时的回调
* @private
*/
_onMessage(event) {
this.msgHandle(event);
}
/**
* 发生错误时的回调
* @private
*/
_onError(error) {
console.error('WebSocket 错误:', error);
this.stateHandle(2);
}
}
export default WebSocketClient;

View File

@ -15,8 +15,16 @@
<!-- Voice Input Modal --> <!-- Voice Input Modal -->
<div class="voice-input-modal-overlay" v-if="showVoiceModal"> <div class="voice-input-modal-overlay" v-if="showVoiceModal">
<div class="voice-bubble"> <div class="voice-bubble">
<!-- TODO: Add voice visualizer here --> <!-- Voice Visualizer -->
<div class="voice-wave-placeholder">语音波形区域</div> <AudioWaveform :pcmData="pcmData" />
<!-- Timer Display -->
<div class="timer" :class="{ 'warning': remainingTime <= 5 }">
{{ formatTime(remainingTime) }}
</div>
<!-- Real-time Text Display -->
<div class="real-time-text" v-if="realTimeText">
{{ realTimeText }}
</div>
</div> </div>
<div class="voice-modal-controls"> <div class="voice-modal-controls">
<div class="modal-control-item"> <div class="modal-control-item">
@ -36,12 +44,12 @@
> >
</el-button> </el-button>
</div> </div>
<div class="modal-control-item"> <!-- <div class="modal-control-item">
<span class="modal-btn-label">转文字</span> <span class="modal-btn-label">转文字</span>
<el-button class="modal-btn text-send-btn" circle @click="sendAsText"> <el-button class="modal-btn text-send-btn" circle @click="sendAsText">
<el-icon><ChatDotSquare /></el-icon> <el-icon><ChatDotSquare /></el-icon>
</el-button> </el-button>
</div> </div> -->
</div> </div>
<div class="voice-modal-tip">{{ isRecording ? '正在录音...' : '点击麦克风开始录音' }}</div> <div class="voice-modal-tip">{{ isRecording ? '正在录音...' : '点击麦克风开始录音' }}</div>
</div> </div>
@ -61,6 +69,11 @@
import { ref, onMounted, onUnmounted } from 'vue' import { ref, onMounted, onUnmounted } from 'vue'
import { ElButton, ElMessage, ElIcon } from 'element-plus' import { ElButton, ElMessage, ElIcon } from 'element-plus'
import { Microphone, ChatDotSquare, Close } from '@element-plus/icons-vue' import { Microphone, ChatDotSquare, Close } from '@element-plus/icons-vue'
import Recorder from 'recorder-core'
import 'recorder-core/src/engine/pcm'
import 'recorder-core/src/engine/wav'
import WebSocketClient from '@/utils/websocket'
import AudioWaveform from '@/components/AudioWaveform.vue'
// Props definition // Props definition
const props = defineProps({ const props = defineProps({
@ -71,137 +84,224 @@ const props = defineProps({
}) })
// Emits definition // Emits definition
const emit = defineEmits(['sendText']) const emit = defineEmits(['sendText', 'sendVoice'])
// Refs related to voice input // Refs related to voice input
const isRecording = ref(false) const isRecording = ref(false)
const isVoiceToTextConverting = ref(false) const isVoiceToTextConverting = ref(false)
const showVoiceModal = ref(false) const showVoiceModal = ref(false)
let mediaRecorder = null const realTimeText = ref('')
const remainingTime = ref(30)
const pcmData = ref([])
let recorder = null
let websocketClient = null
let timer = null
let audioChunks = [] let audioChunks = []
// --- Voice Input Logic --- // WebSocket configuration
const wsConfig = {
const openVoiceModal = () => { url: 'wss://160.202.224.52:10096/',
if (props.isDisabled) return; initConfig: {
showVoiceModal.value = true; mode: '2pass',
// Reset state in case modal was closed unexpectedly before wav_format: 'pcm',
isRecording.value = false; chunk_size: [5, 10, 5],
audioChunks = []; itn: true
} },
msgHandle: (event) => {
const closeVoiceModal = () => { try {
showVoiceModal.value = false; const data = JSON.parse(event.data)
if (isRecording.value) { if (data.text) {
stopRecording(); // Stop recording if modal is closed while recording realTimeText.value = data.text
isRecording.value = false; }
} catch (error) {
console.error('WebSocket message parse error:', error)
}
},
stateHandle: (state) => {
console.log('WebSocket state:', state)
} }
} }
// --- Voice Input Logic ---
const openVoiceModal = async () => {
if (props.isDisabled) return
showVoiceModal.value = true
isRecording.value = false
realTimeText.value = ''
remainingTime.value = 30
audioChunks = []
//
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
startRecording(stream)
isRecording.value = true
startTimer()
connectWebSocket()
} catch (err) {
ElMessage.error('无法访问麦克风,请检查权限设置')
console.error('获取麦克风权限失败:', err)
closeVoiceModal()
}
}
const closeVoiceModal = () => {
showVoiceModal.value = false
if (isRecording.value) {
stopRecording()
isRecording.value = false
}
clearInterval(timer)
//
if (recorder) {
recorder.close(() => {
//
if (recorder.stream) {
recorder.stream.getTracks().forEach(track => track.stop())
}
recorder = null
})
}
// WebSocket
if (websocketClient) {
websocketClient.disconnect()
websocketClient = null
}
//
audioChunks = []
realTimeText.value = ''
remainingTime.value = 30
}
const cancelVoiceInputFromModal = () => { const cancelVoiceInputFromModal = () => {
closeVoiceModal(); closeVoiceModal()
} }
const confirmVoiceInput = async () => { const confirmVoiceInput = async () => {
if (!isRecording.value) { if (isRecording.value) {
// Start recording stopRecording()
try { isRecording.value = false
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); clearInterval(timer)
startRecording(stream);
isRecording.value = true;
} catch (err) {
ElMessage.error('无法访问麦克风,请检查权限设置');
console.error('获取麦克风权限失败:', err);
closeVoiceModal(); // Close modal on error
}
} else {
// Stop recording and process
stopRecording();
isRecording.value = false;
if (audioChunks.length === 0) { if (audioChunks.length === 0) {
ElMessage.warning('录音时间太短'); ElMessage.warning('录音时间太短')
closeVoiceModal(); closeVoiceModal()
return; return
} }
isVoiceToTextConverting.value = true; // Show converting overlay const audioBlob = new Blob(audioChunks, { type: 'audio/wav' })
closeVoiceModal(); // Close the modal after stopping emit('sendVoice', audioBlob)
closeVoiceModal()
// Simulate conversion & emit result
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
console.log('录音完成,模拟转换,音频大小:', audioBlob.size);
setTimeout(() => {
const simulatedText = "模拟语音识别结果" + Date.now(); // Example text
emit('sendText', simulatedText);
isVoiceToTextConverting.value = false;
audioChunks = []; // Clear chunks after processing
}, 1500);
} }
} }
const sendAsText = () => { const sendAsText = () => {
// Currently, just acts like confirm/stop
if (isRecording.value) { if (isRecording.value) {
confirmVoiceInput(); stopRecording()
isRecording.value = false
clearInterval(timer)
if (realTimeText.value) {
emit('sendText', realTimeText.value)
} else {
ElMessage.warning('没有识别到文字')
}
closeVoiceModal()
} else { } else {
// If not recording, maybe do nothing or just close? closeVoiceModal()
closeVoiceModal();
} }
} }
const startRecording = (stream) => { const startRecording = (stream) => {
audioChunks = []; // Clear previous chunks audioChunks = []
mediaRecorder = new MediaRecorder(stream); recorder = new Recorder({
type: 'pcm', //
mediaRecorder.ondataavailable = (event) => { sampleRate: 16000, //
if (event.data.size > 0) { bitRate: 16, //
audioChunks.push(event.data); onProcess: (buffers, powerLevel, duration, sampleRate) => {
const pcm = buffers[buffers.length - 1]
if (pcm && pcm.length > 0) {
pcmData.value = Array.from(pcm)
}
// WebSocket
if (websocketClient && websocketClient.socket.readyState === WebSocket.OPEN) {
const pcmDataSend = buffers[buffers.length - 1]
websocketClient.send({
is_speaking: true,
audio_data: pcmDataSend
})
}
} }
}; })
mediaRecorder.onstop = () => {
// Stop associated media stream tracks
mediaRecorder.stream.getTracks().forEach(track => track.stop());
// Note: Actual processing is now triggered in confirmVoiceInput after calling stopRecording
};
mediaRecorder.onerror = (event) => { recorder.open(() => {
console.error('MediaRecorder error:', event.error); recorder.start()
ElMessage.error('录音出错'); }, (msg, isUserNotAllow) => {
isRecording.value = false; // Reset state on error ElMessage.error('录音失败: ' + msg)
closeVoiceModal(); closeVoiceModal()
}; })
mediaRecorder.start();
} }
const stopRecording = () => { const stopRecording = () => {
if (mediaRecorder && mediaRecorder.state === 'recording') { if (recorder) {
try { recorder.stop((blob, duration) => {
mediaRecorder.stop(); // Convert PCM to WAV before saving
} catch (error) { Recorder.pcm2wav({
console.error("Error stopping MediaRecorder:", error); blob: blob,
// Attempt to stop tracks directly as a fallback sampleRate: 16000,
if (mediaRecorder.stream) { bitRate: 16
mediaRecorder.stream.getTracks().forEach(track => track.stop()); }, (wavBlob) => {
audioChunks.push(wavBlob)
// Send end signal to WebSocket
if (websocketClient && websocketClient.socket.readyState === WebSocket.OPEN) {
websocketClient.send({
is_speaking: false
})
} }
} }, (msg) => {
console.error('PCM转WAV失败:', msg)
})
}, (msg) => {
console.error('录音停止失败:', msg)
})
} }
// Reset recorder instance }
mediaRecorder = null;
const startTimer = () => {
timer = setInterval(() => {
remainingTime.value--
if (remainingTime.value <= 0) {
stopRecording()
isRecording.value = false
clearInterval(timer)
}
}, 1000)
}
const formatTime = (seconds) => {
const mins = Math.floor(seconds / 60)
const secs = seconds % 60
return `${mins}:${secs.toString().padStart(2, '0')}`
}
const connectWebSocket = () => {
websocketClient = new WebSocketClient(wsConfig)
websocketClient.connect(wsConfig.url)
} }
// --- Lifecycle hooks --- // --- Lifecycle hooks ---
onMounted(() => { onMounted(() => {
if (!('MediaRecorder' in window) || !navigator.mediaDevices) { if (!('MediaRecorder' in window) || !navigator.mediaDevices) {
console.warn('浏览器不支持语音录制功能'); console.warn('浏览器不支持语音录制功能')
// Optionally disable voice functionality entirely
} }
}) })
onUnmounted(() => { onUnmounted(() => {
stopRecording(); // Ensure recording is stopped on component unmount closeVoiceModal()
}) })
</script> </script>
@ -247,8 +347,40 @@ onUnmounted(() => {
text-align: center; text-align: center;
position: relative; position: relative;
.voice-wave-placeholder { .voice-wave {
width: 100%;
height: 48px;
display: flex;
align-items: center;
justify-content: center;
margin: 10px 0;
background: transparent;
}
.voice-wave canvas {
width: 240px;
height: 48px;
display: block;
background: transparent;
}
.timer {
font-size: 14px; font-size: 14px;
color: white;
margin: 5px 0;
&.warning {
color: #ff4d4f;
animation: blink 1s infinite;
}
}
.real-time-text {
font-size: 14px;
color: white;
margin: 10px 0;
min-height: 20px;
word-break: break-all;
} }
&::after { &::after {
@ -395,4 +527,29 @@ html.dark .voice-to-text-content {
html.dark .converting-text { html.dark .converting-text {
color: #eee; color: #eee;
} }
.timer {
font-size: 14px;
color: white;
margin: 5px 0;
&.warning {
color: #ff4d4f;
animation: blink 1s infinite;
}
}
.real-time-text {
font-size: 14px;
color: white;
margin: 10px 0;
min-height: 20px;
word-break: break-all;
}
@keyframes blink {
0% { opacity: 1; }
50% { opacity: 0.5; }
100% { opacity: 1; }
}
</style> </style>

View File

@ -58,21 +58,21 @@ export default defineConfig(({ command, mode }) => {
rewrite: (path) => path.replace(/^\/api/, ''), rewrite: (path) => path.replace(/^\/api/, ''),
secure: false, secure: false,
configure: (proxy, options) => { configure: (proxy, options) => {
proxy.on('proxyReq', (proxyReq, req, res) => { // proxy.on('proxyReq', (proxyReq, req, res) => {
const targetOrigin = new URL(env.VITE_APP_BASE_API).origin; // const targetOrigin = new URL(env.VITE_APP_BASE_API).origin;
proxyReq.setHeader('Origin', targetOrigin); // proxyReq.setHeader('Origin', targetOrigin);
if (req.originalUrl && req.originalUrl.includes('chat-messages')) { // if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
proxyReq.setHeader('Accept', 'text/event-stream'); // proxyReq.setHeader('Accept', 'text/event-stream');
} // }
}); // });
proxy.on('proxyRes', (proxyRes, req, res) => { // proxy.on('proxyRes', (proxyRes, req, res) => {
if (req.originalUrl && req.originalUrl.includes('chat-messages')) { // if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
proxyRes.headers['content-type'] = 'text/event-stream'; // proxyRes.headers['content-type'] = 'text/event-stream';
proxyRes.headers['Cache-Control'] = 'no-cache'; // proxyRes.headers['Cache-Control'] = 'no-cache';
proxyRes.headers['Connection'] = 'keep-alive'; // proxyRes.headers['Connection'] = 'keep-alive';
} // }
}); // });
} }
} }
} }