feat(voice): 实现语音输入功能

- 添加 WebSocket 通用工具类
- 集成 recorder-core 库实现音频录制
- 添加音频波形显示和实时文本显示功能
- 实现语音数据实时发送到 WebSocket 服务器
- 优化语音输入界面和交互逻辑
This commit is contained in:
Lexcubia 2025-04-27 11:51:47 +08:00
parent 6a20e26e5d
commit 15514759e2
4 changed files with 395 additions and 107 deletions

View File

@ -42,10 +42,13 @@
"echarts": "^5.6.0",
"element-plus": "^2.9.7",
"marked": "^15.0.8",
"pcm": "^1.0.3",
"pinia": "^3.0.2",
"pinia-plugin-persistedstate": "^4.2.0",
"recorder-core": "^1.3.25011100",
"vue": "^3.5.13",
"vue-router": "^4.5.0"
"vue-router": "^4.5.0",
"wav": "^1.0.2"
},
"devDependencies": {
"@types/node": "^20.11.19",

128
src/utils/websocket.js Normal file
View File

@ -0,0 +1,128 @@
/**
* WebSocket 通用工具类
* @author Lexcubia
* @date 2024-04-25
*/
class WebSocketClient {
constructor(config) {
this.socket = null;
this.msgHandle = config.msgHandle;
this.stateHandle = config.stateHandle;
this.config = config;
}
/**
* 建立 WebSocket 连接
* @param {string} url - WebSocket 服务器地址
* @returns {boolean} - 连接是否成功
*/
connect(url) {
if (!url) {
console.error('WebSocket URL 不能为空');
return false;
}
if (!url.match(/wss?:\S*/)) {
console.error('WebSocket URL 格式不正确');
return false;
}
if (!('WebSocket' in window)) {
console.error('当前浏览器不支持 WebSocket');
return false;
}
try {
this.socket = new WebSocket(url);
this.socket.onopen = this._onOpen.bind(this);
this.socket.onclose = this._onClose.bind(this);
this.socket.onmessage = this._onMessage.bind(this);
this.socket.onerror = this._onError.bind(this);
return true;
} catch (error) {
console.error('WebSocket 连接失败:', error);
return false;
}
}
/**
* 关闭 WebSocket 连接
*/
disconnect() {
if (this.socket) {
this.socket.close();
this.socket = null;
}
}
/**
* 发送数据
* @param {any} data - 要发送的数据
*/
send(data) {
if (!this.socket || this.socket.readyState !== WebSocket.OPEN) {
console.error('WebSocket 未连接');
return;
}
try {
if (typeof data === 'object') {
data = JSON.stringify(data);
}
this.socket.send(data);
} catch (error) {
console.error('发送数据失败:', error);
}
}
/**
* 连接建立时的回调
* @private
*/
_onOpen() {
console.log('WebSocket 连接成功');
this.stateHandle(0);
// 发送初始化配置
const initConfig = {
chunk_size: [5, 10, 5],
wav_name: 'h5',
is_speaking: true,
chunk_interval: 10,
itn: this.config.itn || false,
mode: this.config.mode || '2pass',
...this.config.initConfig
};
this.send(initConfig);
}
/**
* 连接关闭时的回调
* @private
*/
_onClose() {
console.log('WebSocket 连接关闭');
this.stateHandle(1);
}
/**
* 收到消息时的回调
* @private
*/
_onMessage(event) {
this.msgHandle(event);
}
/**
* 发生错误时的回调
* @private
*/
_onError(error) {
console.error('WebSocket 错误:', error);
this.stateHandle(2);
}
}
export default WebSocketClient;

View File

@ -15,8 +15,16 @@
<!-- Voice Input Modal -->
<div class="voice-input-modal-overlay" v-if="showVoiceModal">
<div class="voice-bubble">
<!-- TODO: Add voice visualizer here -->
<div class="voice-wave-placeholder">语音波形区域</div>
<!-- Voice Visualizer -->
<AudioWaveform :pcmData="pcmData" />
<!-- Timer Display -->
<div class="timer" :class="{ 'warning': remainingTime <= 5 }">
{{ formatTime(remainingTime) }}
</div>
<!-- Real-time Text Display -->
<div class="real-time-text" v-if="realTimeText">
{{ realTimeText }}
</div>
</div>
<div class="voice-modal-controls">
<div class="modal-control-item">
@ -36,12 +44,12 @@
>
</el-button>
</div>
<div class="modal-control-item">
<!-- <div class="modal-control-item">
<span class="modal-btn-label">转文字</span>
<el-button class="modal-btn text-send-btn" circle @click="sendAsText">
<el-icon><ChatDotSquare /></el-icon>
</el-button>
</div>
</div> -->
</div>
<div class="voice-modal-tip">{{ isRecording ? '正在录音...' : '点击麦克风开始录音' }}</div>
</div>
@ -61,6 +69,11 @@
import { ref, onMounted, onUnmounted } from 'vue'
import { ElButton, ElMessage, ElIcon } from 'element-plus'
import { Microphone, ChatDotSquare, Close } from '@element-plus/icons-vue'
import Recorder from 'recorder-core'
import 'recorder-core/src/engine/pcm'
import 'recorder-core/src/engine/wav'
import WebSocketClient from '@/utils/websocket'
import AudioWaveform from '@/components/AudioWaveform.vue'
// Props definition
const props = defineProps({
@ -71,137 +84,224 @@ const props = defineProps({
})
// Emits definition
const emit = defineEmits(['sendText'])
const emit = defineEmits(['sendText', 'sendVoice'])
// Refs related to voice input
const isRecording = ref(false)
const isVoiceToTextConverting = ref(false)
const showVoiceModal = ref(false)
let mediaRecorder = null
const realTimeText = ref('')
const remainingTime = ref(30)
const pcmData = ref([])
let recorder = null
let websocketClient = null
let timer = null
let audioChunks = []
// --- Voice Input Logic ---
const openVoiceModal = () => {
if (props.isDisabled) return;
showVoiceModal.value = true;
// Reset state in case modal was closed unexpectedly before
isRecording.value = false;
audioChunks = [];
}
const closeVoiceModal = () => {
showVoiceModal.value = false;
if (isRecording.value) {
stopRecording(); // Stop recording if modal is closed while recording
isRecording.value = false;
// WebSocket configuration
const wsConfig = {
url: 'wss://160.202.224.52:10096/',
initConfig: {
mode: '2pass',
wav_format: 'pcm',
chunk_size: [5, 10, 5],
itn: true
},
msgHandle: (event) => {
try {
const data = JSON.parse(event.data)
if (data.text) {
realTimeText.value = data.text
}
} catch (error) {
console.error('WebSocket message parse error:', error)
}
},
stateHandle: (state) => {
console.log('WebSocket state:', state)
}
}
// --- Voice Input Logic ---
const openVoiceModal = async () => {
if (props.isDisabled) return
showVoiceModal.value = true
isRecording.value = false
realTimeText.value = ''
remainingTime.value = 30
audioChunks = []
//
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
startRecording(stream)
isRecording.value = true
startTimer()
connectWebSocket()
} catch (err) {
ElMessage.error('无法访问麦克风,请检查权限设置')
console.error('获取麦克风权限失败:', err)
closeVoiceModal()
}
}
const closeVoiceModal = () => {
showVoiceModal.value = false
if (isRecording.value) {
stopRecording()
isRecording.value = false
}
clearInterval(timer)
//
if (recorder) {
recorder.close(() => {
//
if (recorder.stream) {
recorder.stream.getTracks().forEach(track => track.stop())
}
recorder = null
})
}
// WebSocket
if (websocketClient) {
websocketClient.disconnect()
websocketClient = null
}
//
audioChunks = []
realTimeText.value = ''
remainingTime.value = 30
}
const cancelVoiceInputFromModal = () => {
closeVoiceModal();
closeVoiceModal()
}
const confirmVoiceInput = async () => {
if (!isRecording.value) {
// Start recording
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
startRecording(stream);
isRecording.value = true;
} catch (err) {
ElMessage.error('无法访问麦克风,请检查权限设置');
console.error('获取麦克风权限失败:', err);
closeVoiceModal(); // Close modal on error
}
} else {
// Stop recording and process
stopRecording();
isRecording.value = false;
if (isRecording.value) {
stopRecording()
isRecording.value = false
clearInterval(timer)
if (audioChunks.length === 0) {
ElMessage.warning('录音时间太短');
closeVoiceModal();
return;
ElMessage.warning('录音时间太短')
closeVoiceModal()
return
}
isVoiceToTextConverting.value = true; // Show converting overlay
closeVoiceModal(); // Close the modal after stopping
// Simulate conversion & emit result
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
console.log('录音完成,模拟转换,音频大小:', audioBlob.size);
setTimeout(() => {
const simulatedText = "模拟语音识别结果" + Date.now(); // Example text
emit('sendText', simulatedText);
isVoiceToTextConverting.value = false;
audioChunks = []; // Clear chunks after processing
}, 1500);
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' })
emit('sendVoice', audioBlob)
closeVoiceModal()
}
}
const sendAsText = () => {
// Currently, just acts like confirm/stop
if (isRecording.value) {
confirmVoiceInput();
stopRecording()
isRecording.value = false
clearInterval(timer)
if (realTimeText.value) {
emit('sendText', realTimeText.value)
} else {
ElMessage.warning('没有识别到文字')
}
closeVoiceModal()
} else {
// If not recording, maybe do nothing or just close?
closeVoiceModal();
closeVoiceModal()
}
}
const startRecording = (stream) => {
audioChunks = []; // Clear previous chunks
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunks.push(event.data);
audioChunks = []
recorder = new Recorder({
type: 'pcm', //
sampleRate: 16000, //
bitRate: 16, //
onProcess: (buffers, powerLevel, duration, sampleRate) => {
const pcm = buffers[buffers.length - 1]
if (pcm && pcm.length > 0) {
pcmData.value = Array.from(pcm)
}
// WebSocket
if (websocketClient && websocketClient.socket.readyState === WebSocket.OPEN) {
const pcmDataSend = buffers[buffers.length - 1]
websocketClient.send({
is_speaking: true,
audio_data: pcmDataSend
})
}
}
};
mediaRecorder.onstop = () => {
// Stop associated media stream tracks
mediaRecorder.stream.getTracks().forEach(track => track.stop());
// Note: Actual processing is now triggered in confirmVoiceInput after calling stopRecording
};
})
mediaRecorder.onerror = (event) => {
console.error('MediaRecorder error:', event.error);
ElMessage.error('录音出错');
isRecording.value = false; // Reset state on error
closeVoiceModal();
};
mediaRecorder.start();
recorder.open(() => {
recorder.start()
}, (msg, isUserNotAllow) => {
ElMessage.error('录音失败: ' + msg)
closeVoiceModal()
})
}
const stopRecording = () => {
if (mediaRecorder && mediaRecorder.state === 'recording') {
try {
mediaRecorder.stop();
} catch (error) {
console.error("Error stopping MediaRecorder:", error);
// Attempt to stop tracks directly as a fallback
if (mediaRecorder.stream) {
mediaRecorder.stream.getTracks().forEach(track => track.stop());
if (recorder) {
recorder.stop((blob, duration) => {
// Convert PCM to WAV before saving
Recorder.pcm2wav({
blob: blob,
sampleRate: 16000,
bitRate: 16
}, (wavBlob) => {
audioChunks.push(wavBlob)
// Send end signal to WebSocket
if (websocketClient && websocketClient.socket.readyState === WebSocket.OPEN) {
websocketClient.send({
is_speaking: false
})
}
}
}, (msg) => {
console.error('PCM转WAV失败:', msg)
})
}, (msg) => {
console.error('录音停止失败:', msg)
})
}
// Reset recorder instance
mediaRecorder = null;
}
const startTimer = () => {
timer = setInterval(() => {
remainingTime.value--
if (remainingTime.value <= 0) {
stopRecording()
isRecording.value = false
clearInterval(timer)
}
}, 1000)
}
const formatTime = (seconds) => {
const mins = Math.floor(seconds / 60)
const secs = seconds % 60
return `${mins}:${secs.toString().padStart(2, '0')}`
}
const connectWebSocket = () => {
websocketClient = new WebSocketClient(wsConfig)
websocketClient.connect(wsConfig.url)
}
// --- Lifecycle hooks ---
onMounted(() => {
if (!('MediaRecorder' in window) || !navigator.mediaDevices) {
console.warn('浏览器不支持语音录制功能');
// Optionally disable voice functionality entirely
console.warn('浏览器不支持语音录制功能')
}
})
onUnmounted(() => {
stopRecording(); // Ensure recording is stopped on component unmount
closeVoiceModal()
})
</script>
@ -247,8 +347,40 @@ onUnmounted(() => {
text-align: center;
position: relative;
.voice-wave-placeholder {
.voice-wave {
width: 100%;
height: 48px;
display: flex;
align-items: center;
justify-content: center;
margin: 10px 0;
background: transparent;
}
.voice-wave canvas {
width: 240px;
height: 48px;
display: block;
background: transparent;
}
.timer {
font-size: 14px;
color: white;
margin: 5px 0;
&.warning {
color: #ff4d4f;
animation: blink 1s infinite;
}
}
.real-time-text {
font-size: 14px;
color: white;
margin: 10px 0;
min-height: 20px;
word-break: break-all;
}
&::after {
@ -395,4 +527,29 @@ html.dark .voice-to-text-content {
html.dark .converting-text {
color: #eee;
}
.timer {
font-size: 14px;
color: white;
margin: 5px 0;
&.warning {
color: #ff4d4f;
animation: blink 1s infinite;
}
}
.real-time-text {
font-size: 14px;
color: white;
margin: 10px 0;
min-height: 20px;
word-break: break-all;
}
@keyframes blink {
0% { opacity: 1; }
50% { opacity: 0.5; }
100% { opacity: 1; }
}
</style>

View File

@ -58,21 +58,21 @@ export default defineConfig(({ command, mode }) => {
rewrite: (path) => path.replace(/^\/api/, ''),
secure: false,
configure: (proxy, options) => {
proxy.on('proxyReq', (proxyReq, req, res) => {
const targetOrigin = new URL(env.VITE_APP_BASE_API).origin;
proxyReq.setHeader('Origin', targetOrigin);
// proxy.on('proxyReq', (proxyReq, req, res) => {
// const targetOrigin = new URL(env.VITE_APP_BASE_API).origin;
// proxyReq.setHeader('Origin', targetOrigin);
if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
proxyReq.setHeader('Accept', 'text/event-stream');
}
});
proxy.on('proxyRes', (proxyRes, req, res) => {
if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
proxyRes.headers['content-type'] = 'text/event-stream';
proxyRes.headers['Cache-Control'] = 'no-cache';
proxyRes.headers['Connection'] = 'keep-alive';
}
});
// if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
// proxyReq.setHeader('Accept', 'text/event-stream');
// }
// });
// proxy.on('proxyRes', (proxyRes, req, res) => {
// if (req.originalUrl && req.originalUrl.includes('chat-messages')) {
// proxyRes.headers['content-type'] = 'text/event-stream';
// proxyRes.headers['Cache-Control'] = 'no-cache';
// proxyRes.headers['Connection'] = 'keep-alive';
// }
// });
}
}
}